# BERT model classification

In [1]:
!git clone -b docker https://github.com/yoheikikuta/bert.git

fatal: destination path 'bert' already exists and is not an empty directory.


In [2]:
!ls bert/

CONTRIBUTING.md		    extract_features.py   run_classifier.py
Dockerfile		    model		  run_pretraining.py
LICENSE			    modeling.py		  run_squad.py
README.md		    modeling_test.py	  sample_text.txt
__init__.py		    multilingual.md	  tmp
__pycache__		    optimization.py	  tokenization.py
create_pretraining_data.py  optimization_test.py  tokenization_test.py
data			    requirements.txt	  utils


### Model and data download

We solve RTE task in GLUE datasets; see https://www.nyu.edu/projects/bowman/glue.pdf in detail.

In [4]:
import os

In [5]:
os.makedirs("./bert/model", exist_ok=True)
os.makedirs("./bert/data", exist_ok=True)

In [6]:
!wget -O ./bert/model/uncased_L-12_H-768_A-12.zip https://storage.googleapis.com/bert_models/2018_10_18/uncased_L-12_H-768_A-12.zip

In [7]:
!unzip ./bert/model/uncased_L-12_H-768_A-12.zip -d ./bert/model/ && \
  rm ./bert/model/uncased_L-12_H-768_A-12.zip

In [8]:
!python3 ./bert/utils/download_glue_data.py --data_dir ./bert/data --tasks RTE

### Model fine-tuning

It takes about 3 hours in a `n1-standard-4` instance on GCP Compute Engine.

In [2]:
%%time

!python3 ./bert/run_classifier.py \
  --task_name=RTE \
  --do_train=true \
  --do_eval=true \
  --data_dir=./bert/data/RTE \
  --vocab_file=./bert/model/uncased_L-12_H-768_A-12/vocab.txt \
  --bert_config_file=./bert/model/uncased_L-12_H-768_A-12/bert_config.json \
  --init_checkpoint=./bert/model/uncased_L-12_H-768_A-12/bert_model.ckpt \
  --max_seq_length=128 \
  --train_batch_size=32 \
  --learning_rate=2e-5 \
  --num_train_epochs=3.0 \
  --output_dir=./bert/tmp/rte_output/

INFO:tensorflow:Using config: {'_num_ps_replicas': 0, '_train_distribute': None, '_tpu_config': TPUConfig(iterations_per_loop=1000, num_shards=8, num_cores_per_replica=None, per_host_input_for_training=3, tpu_job_name=None, initial_infeed_sleep_secs=None, input_partition_dims=None), '_keep_checkpoint_max': 5, '_is_chief': True, '_model_dir': './bert/tmp/rte_output/', '_save_summary_steps': 100, '_global_id_in_cluster': 0, '_task_id': 0, '_log_step_count_steps': None, '_protocol': None, '_cluster': None, '_num_worker_replicas': 1, '_session_config': allow_soft_placement: true
graph_options {
  rewrite_options {
    meta_optimizer_iterations: ONE
  }
}
, '_device_fn': None, '_save_checkpoints_steps': 1000, '_task_type': 'worker', '_master': '', '_tf_random_seed': None, '_service': None, '_cluster_spec': <tensorflow.python.training.server_lib.ClusterSpec object at 0x7fb2b93e50f0>, '_save_checkpoints_secs': None, '_eval_distribute': None, '_experimental_distribute': None, '_keep_chec

INFO:tensorflow:***** Running training *****
INFO:tensorflow:  Num examples = 2490
INFO:tensorflow:  Batch size = 32
INFO:tensorflow:  Num steps = 233
Instructions for updating:
Use `tf.data.experimental.map_and_batch(...)`.
INFO:tensorflow:Calling model_fn.
INFO:tensorflow:Running train on CPU
INFO:tensorflow:*** Features ***
INFO:tensorflow:  name = input_ids, shape = (32, 128)
INFO:tensorflow:  name = input_mask, shape = (32, 128)
INFO:tensorflow:  name = label_ids, shape = (32,)
INFO:tensorflow:  name = segment_ids, shape = (32, 128)
INFO:tensorflow:**** Trainable Variables ****
INFO:tensorflow:  name = bert/embeddings/word_embeddings:0, shape = (30522, 768), *INIT_FROM_CKPT*
INFO:tensorflow:  name = bert/embeddings/token_type_embeddings:0, shape = (2, 768), *INIT_FROM_CKPT*
INFO:tensorflow:  name = bert/embeddings/position_embeddings:0, shape = (512, 768), *INIT_FROM_CKPT*
INFO:tensorflow:  name = bert/embeddings/LayerNorm/beta:0, shape = (768,), *INIT_FROM_CKPT*
INFO:tensorflow: 

INFO:tensorflow:Done calling model_fn.
INFO:tensorflow:Create CheckpointSaverHook.
INFO:tensorflow:Graph was finalized.
2018-11-18 08:07:59.343665: I tensorflow/core/platform/cpu_feature_guard.cc:141] Your CPU supports instructions that this TensorFlow binary was not compiled to use: AVX2 FMA
INFO:tensorflow:Running local_init_op.
INFO:tensorflow:Done running local_init_op.
INFO:tensorflow:Saving checkpoints for 0 into ./bert/tmp/rte_output/model.ckpt.
INFO:tensorflow:global_step/sec: 0.0230017
INFO:tensorflow:examples/sec: 0.736054
INFO:tensorflow:global_step/sec: 0.0230105
INFO:tensorflow:examples/sec: 0.736337
INFO:tensorflow:Saving checkpoints for 233 into ./bert/tmp/rte_output/model.ckpt.
INFO:tensorflow:Loss for final step: 0.31156892.
INFO:tensorflow:training_loop marked as finished
INFO:tensorflow:Writing example 0 of 277
INFO:tensorflow:*** Example ***
INFO:tensorflow:guid: dev-0
INFO:tensorflow:tokens: [CLS] dana reeve , the widow of the actor christopher reeve , has died of 

INFO:tensorflow:***** Running evaluation *****
INFO:tensorflow:  Num examples = 277
INFO:tensorflow:  Batch size = 8
INFO:tensorflow:Calling model_fn.
INFO:tensorflow:Running eval on CPU
INFO:tensorflow:*** Features ***
INFO:tensorflow:  name = input_ids, shape = (?, 128)
INFO:tensorflow:  name = input_mask, shape = (?, 128)
INFO:tensorflow:  name = label_ids, shape = (?,)
INFO:tensorflow:  name = segment_ids, shape = (?, 128)
INFO:tensorflow:**** Trainable Variables ****
INFO:tensorflow:  name = bert/embeddings/word_embeddings:0, shape = (30522, 768), *INIT_FROM_CKPT*
INFO:tensorflow:  name = bert/embeddings/token_type_embeddings:0, shape = (2, 768), *INIT_FROM_CKPT*
INFO:tensorflow:  name = bert/embeddings/position_embeddings:0, shape = (512, 768), *INIT_FROM_CKPT*
INFO:tensorflow:  name = bert/embeddings/LayerNorm/beta:0, shape = (768,), *INIT_FROM_CKPT*
INFO:tensorflow:  name = bert/embeddings/LayerNorm/gamma:0, shape = (768,), *INIT_FROM_CKPT*
INFO:tensorflow:  name = bert/encoder

INFO:tensorflow:Done calling model_fn.
INFO:tensorflow:Starting evaluation at 2018-11-18-10:57:23
INFO:tensorflow:Graph was finalized.
INFO:tensorflow:Restoring parameters from ./bert/tmp/rte_output/model.ckpt-233
INFO:tensorflow:Running local_init_op.
INFO:tensorflow:Done running local_init_op.
INFO:tensorflow:Finished evaluation at 2018-11-18-10:59:45
INFO:tensorflow:Saving dict for global step 233: eval_accuracy = 0.6931408, eval_loss = 0.71709377, global_step = 233, loss = 0.71939987
INFO:tensorflow:Saving 'checkpoint_path' summary for global step 233: ./bert/tmp/rte_output/model.ckpt-233
INFO:tensorflow:evaluation_loop marked as finished
INFO:tensorflow:***** Eval results *****
INFO:tensorflow:  eval_accuracy = 0.6931408
INFO:tensorflow:  eval_loss = 0.71709377
INFO:tensorflow:  global_step = 233
INFO:tensorflow:  loss = 0.71939987
CPU times: user 4min 30s, sys: 33.3 s, total: 5min 4s
Wall time: 2h 52min 15s


## Data making for our patent data analysis.

In [1]:
import h5py
import pandas as pd
import numpy as np
import pickle

In [2]:
citations_info_target = pd.read_pickle("../data/citations_info_2000.df.gz")
training_app_df = pd.read_pickle("../data/training_app_1000.df.gz")
testset_app_df = pd.read_pickle("../data/testset_app_1000.df.gz")
grants_target_df = pd.read_pickle("../data/grants_for_2000.df.gz")

In [3]:
citations_info_target.head()

Unnamed: 0,app_id,app_fnm,citation_pat_pgpub_id,parsed,ifw_number,action_type,action_subtype,form892,form1449,citation_in_oa,...,rejection_103,rejection_112,rejection_dp,objection,allowed_claims,cite102_gt1,cite103_gt3,cite103_eq1,cite103_max,signature_type
0,13371769,/work/data/apps/2012/ipa120607/F_2322.xml,7391316,7391316,H20LX5QGPXXIFW4,103.0,a,1,0,1,...,1,0,1,0,0,0,0,1,2,0
1,13371769,/work/data/apps/2012/ipa120607/F_2322.xml,6992580,6992580,H20LX5QGPXXIFW4,102.0,a,1,1,1,...,1,0,1,0,0,0,0,1,2,0
2,13371769,/work/data/apps/2012/ipa120607/F_2322.xml,6992580,6992580,H20LX5QGPXXIFW4,103.0,a,1,1,1,...,1,0,1,0,0,0,0,1,2,0
3,13371769,/work/data/apps/2012/ipa120607/F_2322.xml,7774833,7774833,H20LX5QGPXXIFW4,103.0,a,1,1,1,...,1,0,1,0,0,0,0,1,2,0
4,12282000,/work/data/apps/2009/ipa090312/F_1385.xml,7411209,7411209,G9LENRJ8PPOPPY5,102.0,a,0,1,1,...,1,0,0,0,0,1,0,1,1,3


In [4]:
training_app_df.head()

Unnamed: 0,app_id,xml
0,14222691,"<us-patent-application lang=""EN"" dtd-version=""..."
1,12515852,"<us-patent-application lang=""EN"" dtd-version=""..."
2,12033424,"<us-patent-application lang=""EN"" dtd-version=""..."
3,12402344,"<us-patent-application lang=""EN"" dtd-version=""..."
4,12155425,"<us-patent-application lang=""EN"" dtd-version=""..."


In [5]:
import re
CLAIM_PAT = re.compile(r'<claims[^>]*>(.*)</claims>',re.MULTILINE|re.DOTALL)
TAG_PAT = re.compile(r"<.*?>")
LB_PAT = re.compile(r'[\t\n\r\f\v][" "]*')

def whole_xml_to_claim_xml(whole):
    mat = CLAIM_PAT.search(whole)
    return mat.group(1)
def whole_xml_to_claim(whole):
    return TAG_PAT.sub(' ', whole_xml_to_claim_xml(whole))

def remove_linebreak_from_claim(claim):
    '''
    Remove line break symbol "\n" with space(s).
    '''
    return LB_PAT.sub('', claim)

In [6]:
training_app_df["claim"] = training_app_df["xml"].map(whole_xml_to_claim).map(remove_linebreak_from_claim)
testset_app_df["claim"] = testset_app_df["xml"].map(whole_xml_to_claim).map(remove_linebreak_from_claim)
grants_target_df["claim"] = grants_target_df["xml"].map(whole_xml_to_claim).map(remove_linebreak_from_claim)

In [7]:
training_app_df["claim"][0]

'1 . A terminal comprising:an upper arm having a top surface for a mating area; a lower arm paralleled with the upper arm and having a bottom surface soldering area; and a connecting arm connected with the upper arm and the lower arm. 2 . The terminal as recited in  claim 1 , wherein the whole terminal is structured in a folded manner with a tiny gap therebetween in a vertical direction, and one of said upper arm and said lower arm forms a projection in said gap to abut against the other in a vertical direction. 3 . The terminal as recited in  claim 2 , wherein said one of the upper arm and the lower arm forms a recess corresponding to the projection in said vertical direction. 4 . The terminal as recited in  claim 1 , wherein the upper arm defines a convex plate formed on the top surface thereof and having a top surface, and the mating area is the top surface of the convex plate. 5 . The terminal as recited in  claim 4 , wherein the upper arm defines a recess in a bottom surface there

In [8]:
len( str(training_app_df["claim"][0]).split(" ") )

841

In [9]:
citations_info_target.head()

Unnamed: 0,app_id,app_fnm,citation_pat_pgpub_id,parsed,ifw_number,action_type,action_subtype,form892,form1449,citation_in_oa,...,rejection_103,rejection_112,rejection_dp,objection,allowed_claims,cite102_gt1,cite103_gt3,cite103_eq1,cite103_max,signature_type
0,13371769,/work/data/apps/2012/ipa120607/F_2322.xml,7391316,7391316,H20LX5QGPXXIFW4,103.0,a,1,0,1,...,1,0,1,0,0,0,0,1,2,0
1,13371769,/work/data/apps/2012/ipa120607/F_2322.xml,6992580,6992580,H20LX5QGPXXIFW4,102.0,a,1,1,1,...,1,0,1,0,0,0,0,1,2,0
2,13371769,/work/data/apps/2012/ipa120607/F_2322.xml,6992580,6992580,H20LX5QGPXXIFW4,103.0,a,1,1,1,...,1,0,1,0,0,0,0,1,2,0
3,13371769,/work/data/apps/2012/ipa120607/F_2322.xml,7774833,7774833,H20LX5QGPXXIFW4,103.0,a,1,1,1,...,1,0,1,0,0,0,0,1,2,0
4,12282000,/work/data/apps/2009/ipa090312/F_1385.xml,7411209,7411209,G9LENRJ8PPOPPY5,102.0,a,0,1,1,...,1,0,0,0,0,1,0,1,1,3


dev set in bert repository is corresponding to test set in our case.  
dev set includes label information and will not be used in training.  
(test set in bert does not inlude answer labels.)

Data creating procedure is the following:
- connect app_id and cited grant number
- get [app_id, claim, parsed]
- drop duplicates (duplication can exist because of different action types, etc)
- add cited label as 1

In [10]:
train_data_for_bert = pd.merge(training_app_df, citations_info_target, on='app_id')[['app_id', 'claim', 'parsed']]
dev_data_for_bert = pd.merge(testset_app_df, citations_info_target, on='app_id')[['app_id', 'claim', 'parsed']]

In [11]:
train_data_for_bert.head()

Unnamed: 0,app_id,claim,parsed
0,14222691,1 . A terminal comprising:an upper arm having ...,8179692
1,14222691,1 . A terminal comprising:an upper arm having ...,8179692
2,14222691,1 . A terminal comprising:an upper arm having ...,8206188
3,14222691,1 . A terminal comprising:an upper arm having ...,8206188
4,14222691,1 . A terminal comprising:an upper arm having ...,8177561


In [12]:
print( len(train_data_for_bert) )
print( len(dev_data_for_bert) )

2120
2059


In [13]:
train_data_for_bert = train_data_for_bert.drop_duplicates(keep='first').reset_index(drop=True)
dev_data_for_bert = dev_data_for_bert.drop_duplicates(keep='first').reset_index(drop=True)

In [14]:
print( len(train_data_for_bert) )
print( len(dev_data_for_bert) )

1282
1251


In [15]:
train_data_for_bert['label'] = "cited"
dev_data_for_bert['label'] = "cited"

In [16]:
train_data_for_bert.head()

Unnamed: 0,app_id,claim,parsed,label
0,14222691,1 . A terminal comprising:an upper arm having ...,8179692,cited
1,14222691,1 . A terminal comprising:an upper arm having ...,8206188,cited
2,14222691,1 . A terminal comprising:an upper arm having ...,8177561,cited
3,12515852,1 . A method for increasing seed yield in plan...,7235710,cited
4,12033424,"1 . An image forming apparatus, comprising:an ...",6950953,cited


In [17]:
train_data_for_bert = train_data_for_bert.merge(grants_target_df, how='inner', on='parsed')
train_data_for_bert = train_data_for_bert.drop("xml", axis=1)

dev_data_for_bert = dev_data_for_bert.merge(grants_target_df, how='inner', on='parsed')
dev_data_for_bert = dev_data_for_bert.drop("xml", axis=1)

In [18]:
train_data_for_bert.head()

Unnamed: 0,app_id,claim_x,parsed,label,claim_y
0,14222691,1 . A terminal comprising:an upper arm having ...,8179692,cited,"1. A board, comprising:a board body; a first c..."
1,14222691,1 . A terminal comprising:an upper arm having ...,8206188,cited,1. A connector terminal curved from a strip-sh...
2,14222691,1 . A terminal comprising:an upper arm having ...,8177561,cited,1. A socket contact terminal for electrical co...
3,12515852,1 . A method for increasing seed yield in plan...,7235710,cited,1. A method for expressing in a non-monocotyle...
4,12033424,"1 . An image forming apparatus, comprising:an ...",6950953,cited,"1. A multifunctional printer, comprising:a mai..."


In [19]:
def pick_up_unsited_grants(df, app_id, n=1, random_state=23):
    '''
    Randomly pick up uncited grant pair to a given app_id for generating negative samples.
    '''
    n_rows = df[ df['app_id'] != app_id ].sample(n=n, random_state=random_state)
    
    return [n_rows['parsed'].values[0], "not_cited" ,n_rows['claim_y'].values[0]]

In [20]:
seed = 23

train_non_cited_data = pd.DataFrame([
    [app_id, claimx] + pick_up_unsited_grants(train_data_for_bert, app_id, random_state=seed+idx)
    for idx, (app_id, claimx)
    in enumerate(zip(train_data_for_bert['app_id'], train_data_for_bert['claim_x']))
])

train_non_cited_data.columns = train_data_for_bert.columns

In [21]:
train_non_cited_data.head()

Unnamed: 0,app_id,claim_x,parsed,label,claim_y
0,14222691,1 . A terminal comprising:an upper arm having ...,7137410,not_cited,"1. A mixing valve having an exterior cover, sa..."
1,14222691,1 . A terminal comprising:an upper arm having ...,7419473,not_cited,1. A living body inspection apparatus comprisi...
2,14222691,1 . A terminal comprising:an upper arm having ...,7789044,not_cited,1. A collapsible pet carrier comprising:a tubu...
3,12515852,1 . A method for increasing seed yield in plan...,7702451,not_cited,1. A programmable engines-start system compris...
4,12033424,"1 . An image forming apparatus, comprising:an ...",8133762,not_cited,"1. A method of making a semiconductor device, ..."


In [22]:
seed = 23

dev_non_cited_data = pd.DataFrame([
    [app_id, claimx] + pick_up_unsited_grants(dev_data_for_bert, app_id, random_state=seed+idx)
    for idx, (app_id, claimx)
    in enumerate(zip(dev_data_for_bert['app_id'], dev_data_for_bert['claim_x']))
])

dev_non_cited_data.columns = dev_data_for_bert.columns

In [23]:
dev_non_cited_data.head()

Unnamed: 0,app_id,claim_x,parsed,label,claim_y
0,14307191,"1 . A method to aggregate, filter, and share e...",7729924,not_cited,1. A virtual knowledge management system using...
1,13137006,"1 . A display apparatus, comprising:a position...",8058137,not_cited,1. A method of manufacturing a semiconductor w...
2,12741959,1 - 33 . (canceled) 34 . A compound comprising...,7124864,not_cited,1. A gas assist strut and coupling member for ...
3,12643447,1 . A terminal fitting formed by bending an el...,6979130,not_cited,1. A bearing device for rotatably receiving a ...
4,14200253,1 . A printer for printing a three-dimensional...,6915265,not_cited,1. An integrated health care system for collec...


In [24]:
train_data_for_bert = pd.concat([train_data_for_bert, train_non_cited_data]).reset_index(drop=True)
dev_data_for_bert = pd.concat([dev_data_for_bert, dev_non_cited_data]).reset_index(drop=True)

In [25]:
train_data_for_bert.head()

Unnamed: 0,app_id,claim_x,parsed,label,claim_y
0,14222691,1 . A terminal comprising:an upper arm having ...,8179692,cited,"1. A board, comprising:a board body; a first c..."
1,14222691,1 . A terminal comprising:an upper arm having ...,8206188,cited,1. A connector terminal curved from a strip-sh...
2,14222691,1 . A terminal comprising:an upper arm having ...,8177561,cited,1. A socket contact terminal for electrical co...
3,12515852,1 . A method for increasing seed yield in plan...,7235710,cited,1. A method for expressing in a non-monocotyle...
4,12033424,"1 . An image forming apparatus, comprising:an ...",6950953,cited,"1. A multifunctional printer, comprising:a mai..."


In [26]:
train_data_for_bert['index'] = train_data_for_bert.index
dev_data_for_bert['index'] = dev_data_for_bert.index

In [27]:
train_data_for_bert = train_data_for_bert.drop("app_id", axis=1)
train_data_for_bert = train_data_for_bert.drop("parsed", axis=1)

dev_data_for_bert = dev_data_for_bert.drop("app_id", axis=1)
dev_data_for_bert = dev_data_for_bert.drop("parsed", axis=1)

In [28]:
train_data_for_bert.head()

Unnamed: 0,claim_x,label,claim_y,index
0,1 . A terminal comprising:an upper arm having ...,cited,"1. A board, comprising:a board body; a first c...",0
1,1 . A terminal comprising:an upper arm having ...,cited,1. A connector terminal curved from a strip-sh...,1
2,1 . A terminal comprising:an upper arm having ...,cited,1. A socket contact terminal for electrical co...,2
3,1 . A method for increasing seed yield in plan...,cited,1. A method for expressing in a non-monocotyle...,3
4,"1 . An image forming apparatus, comprising:an ...",cited,"1. A multifunctional printer, comprising:a mai...",4


In [29]:
train_data_for_bert = train_data_for_bert.loc[:, ['index', 'claim_x', 'claim_y', 'label']]
dev_data_for_bert = dev_data_for_bert.loc[:, ['index', 'claim_x', 'claim_y', 'label']]

In [30]:
train_data_for_bert.columns = ['index', 'claim_app', 'claim_cited_grant', 'label']
dev_data_for_bert.columns = ['index', 'claim_app', 'claim_cited_grant', 'label']

In [31]:
train_data_for_bert.head()

Unnamed: 0,index,claim_app,claim_cited_grant,label
0,0,1 . A terminal comprising:an upper arm having ...,"1. A board, comprising:a board body; a first c...",cited
1,1,1 . A terminal comprising:an upper arm having ...,1. A connector terminal curved from a strip-sh...,cited
2,2,1 . A terminal comprising:an upper arm having ...,1. A socket contact terminal for electrical co...,cited
3,3,1 . A method for increasing seed yield in plan...,1. A method for expressing in a non-monocotyle...,cited
4,4,"1 . An image forming apparatus, comprising:an ...","1. A multifunctional printer, comprising:a mai...",cited


In [32]:
dev_data_for_bert.head()

Unnamed: 0,index,claim_app,claim_cited_grant,label
0,0,"1 . A method to aggregate, filter, and share e...",1. A method for detecting moving objects with ...,cited
1,1,"1 . A display apparatus, comprising:a position...",1. A viewpoint position detecting apparatus fo...,cited
2,2,1 - 33 . (canceled) 34 . A compound comprising...,"1. A double-stranded ribonucleic acid (dsRNA),...",cited
3,3,1 . A terminal fitting formed by bending an el...,1. A female terminal fitting comprising:a subs...,cited
4,4,1 . A printer for printing a three-dimensional...,1. A method of generating an object assembled ...,cited


Save the result dataframe with tab separation.  
Manually upload the dataests onto google cloud storege.

Change label name in order to match RTE datasets case.

In [33]:
train_data_for_bert['label'] = train_data_for_bert['label'].str.replace("not_cited", "not_entailment")
train_data_for_bert['label'] = train_data_for_bert['label'].str.replace("cited", "entailment")

dev_data_for_bert['label'] = dev_data_for_bert['label'].str.replace("not_cited", "not_entailment")
dev_data_for_bert['label'] = dev_data_for_bert['label'].str.replace("cited", "entailment")

In [34]:
train_data_for_bert = train_data_for_bert.sample(frac=1, random_state=seed).reset_index(drop=True)
dev_data_for_bert = dev_data_for_bert.sample(frac=1, random_state=seed).reset_index(drop=True)

In [37]:
train_data_for_bert['index'] = train_data_for_bert.index
dev_data_for_bert['index'] = dev_data_for_bert.index

In [39]:
train_data_for_bert.to_csv("../data/bert_train_1000.tsv", index=False, sep='\t', header=True)
dev_data_for_bert.to_csv("../data/bert_dev_1000.tsv", index=False, sep='\t', header=True)

## Train a model.

Use colab because of TPU acceleration.

See https://colab.research.google.com/drive/1sTnSgh9LMxWyygbtgNibF-0m3j0owN1q.

### Train a lightgbm model for comparison.

In [1]:
import pandas as pd
import numpy as np

In [2]:
train_data = pd.read_csv("../data/bert_train_1000.tsv", sep="\t")
test_data = pd.read_csv("../data/bert_dev_1000.tsv", sep="\t")

In [3]:
train_data.head()

Unnamed: 0,index,claim_app,claim_cited_grant,label
0,0,1 . A process comprising the following steps:(...,"1. A liquid supply apparatus, comprising:a wal...",not_entailment
1,1,1 - 10 . (canceled) 11 . A method for open-loo...,"1. A fuel supply apparatus for an engine, comp...",entailment
2,2,1 . A handpiece for treating biological tissue...,1. A method for irradiating tissue having abso...,entailment
3,3,1 . A power cable comprising:a power input com...,1. A temperature regulating system for a vehic...,not_entailment
4,4,1 . A cutting insert having a substantially cu...,1. A toolholder comprising:a) a cutter body ro...,entailment


Create features by using TF-IDF vector.

raw data will be made as: [claim_app] + [claim_cited_grant] (simple concatenation)

In [4]:
import lightgbm as lgb
from sklearn.feature_extraction.text import TfidfVectorizer

In [5]:
import random
random.seed(23)

In [6]:
vectorizer = TfidfVectorizer(stop_words='english', min_df=2, max_df=0.8)

In [7]:
train_claim_text = [
    sentence_1 + sentence_2 
    for sentence_1, sentence_2 
    in zip(train_data['claim_app'], train_data['claim_cited_grant'])
]


test_claim_text = [
    sentence_1 + sentence_2 
    for sentence_1, sentence_2 
    in zip(test_data['claim_app'], test_data['claim_cited_grant'])
]

In [8]:
%%time

train_x = vectorizer.fit_transform(train_claim_text)
train_y = [ 1 if elem == 'entailment' else 0 for elem in train_data['label'] ] 

CPU times: user 3.43 s, sys: 0 ns, total: 3.43 s
Wall time: 3.43 s


In [9]:
train_x.shape

(2564, 17208)

In [10]:
%%time

test_x = vectorizer.transform(test_claim_text)
test_y = [ 1 if elem == 'entailment' else 0 for elem in test_data['label'] ] 

CPU times: user 3.54 s, sys: 4.87 ms, total: 3.55 s
Wall time: 3.54 s


In [11]:
test_x.shape

(2502, 17208)

Create dataset for lightgbm and train a model.

In [12]:
lgb_train = lgb.Dataset(train_x, train_y)

In [13]:
params = {
    'boosting_type': 'gbdt',
    'objective': 'binary',
    'metric': 'binary_logloss',
    'num_leaves': 50,
    'learning_rate': 0.05,
    'feature_fraction': 0.8,
    'bagging_fraction': 0.8,
    'bagging_freq': 5,
    'min_child_weight': 2,
    'gamma': 0.2,
    'verbose': 0
}

In [14]:
%%time

gbm = lgb.train(params,
                lgb_train,
                num_boost_round=40,
                valid_sets=lgb_train)

[1]	training's binary_logloss: 0.683679
[2]	training's binary_logloss: 0.674582
[3]	training's binary_logloss: 0.666456
[4]	training's binary_logloss: 0.657978
[5]	training's binary_logloss: 0.65061
[6]	training's binary_logloss: 0.640629
[7]	training's binary_logloss: 0.63159
[8]	training's binary_logloss: 0.622584
[9]	training's binary_logloss: 0.614358
[10]	training's binary_logloss: 0.606217
[11]	training's binary_logloss: 0.598234
[12]	training's binary_logloss: 0.590287
[13]	training's binary_logloss: 0.582836
[14]	training's binary_logloss: 0.575774
[15]	training's binary_logloss: 0.56923
[16]	training's binary_logloss: 0.562423
[17]	training's binary_logloss: 0.555749
[18]	training's binary_logloss: 0.549391
[19]	training's binary_logloss: 0.542548
[20]	training's binary_logloss: 0.536594
[21]	training's binary_logloss: 0.531169
[22]	training's binary_logloss: 0.524756
[23]	training's binary_logloss: 0.518757
[24]	training's binary_logloss: 0.513114
[25]	training's binary_loglo

Evaluate the trained model.

In [15]:
predict_prob = gbm.predict(test_x)

In [16]:
predict_label = [ 1 if elem >= 0.5 else 0 for elem in predict_prob]

In [17]:
acc = sum( np.array(predict_label) == np.array(test_y) ) / len(predict_label)

In [18]:
print("accuracy: {}".format(acc))

accuracy: 0.6622701838529177


It shows this problem is SOLVABLE (though accuracy is not so high).

## Inference using a trained BERT model.

In [1]:
import sys
sys.path.append("./bert")

In [2]:
import os

import modeling
import optimization
import tokenization
import tensorflow as tf

from run_classifier import RteProcessor
from run_classifier import model_fn_builder
from run_classifier import file_based_input_fn_builder
from run_classifier import file_based_convert_examples_to_features

### Predict all test data from the test.tsv

In [3]:
bert_config = modeling.BertConfig.from_json_file("./bert/model/uncased_L-12_H-768_A-12/bert_config.json")

In [4]:
class FLAGS(object):
    '''Parameters.'''
    def __init__(self):
        self.vocab_file = "./bert/model/uncased_L-12_H-768_A-12/vocab.txt"
        self.do_lower_case = True
        self.use_tpu = False
        self.output_dir = "../data/bert-training-results/patent_1000"
        self.data_dir = "../data"
        self.max_seq_length = 512
        self.init_checkpoint = "../data/bert-training-results/patent_1000/model.ckpt-1602"
        self.predict_batch_size = 16
        
        # The following parameters are not used in predictions.
        # Just use to create RunConfig.
        self.master = None
        self.save_checkpoints_steps = 1
        self.iterations_per_loop = 1
        self.num_tpu_cores = 1
        self.learning_rate = 0
        self.num_warmup_steps = 0
        self.num_train_steps = 0
        self.train_batch_size = 0
        self.eval_batch_size = 0

In [5]:
FLAGS = FLAGS()

In [6]:
processor = RteProcessor()
label_list = processor.get_labels()

In [7]:
tokenizer = tokenization.FullTokenizer(
    vocab_file=FLAGS.vocab_file, do_lower_case=FLAGS.do_lower_case)

tpu_cluster_resolver = None

is_per_host = tf.contrib.tpu.InputPipelineConfig.PER_HOST_V2

run_config = tf.contrib.tpu.RunConfig(
    cluster=tpu_cluster_resolver,
    master=FLAGS.master,
    model_dir=FLAGS.output_dir,
    save_checkpoints_steps=FLAGS.save_checkpoints_steps,
    tpu_config=tf.contrib.tpu.TPUConfig(
        iterations_per_loop=FLAGS.iterations_per_loop,
        num_shards=FLAGS.num_tpu_cores,
        per_host_input_for_training=is_per_host))

Preapare a test dataset which is the same as dev data in the training.  
Need to prepare the name `test.tsv`.

In [8]:
!cp ../data/bert_dev_1000.tsv ../data/test.tsv

In [9]:
model_fn = model_fn_builder(
    bert_config=bert_config,
    num_labels=len(label_list),
    init_checkpoint=FLAGS.init_checkpoint,
    learning_rate=FLAGS.learning_rate,
    num_train_steps=FLAGS.num_train_steps,
    num_warmup_steps=FLAGS.num_warmup_steps,
    use_tpu=FLAGS.use_tpu,
    use_one_hot_embeddings=FLAGS.use_tpu)


estimator = tf.contrib.tpu.TPUEstimator(
    use_tpu=FLAGS.use_tpu,
    model_fn=model_fn,
    config=run_config,
    train_batch_size=FLAGS.train_batch_size,
    eval_batch_size=FLAGS.eval_batch_size,
    predict_batch_size=FLAGS.predict_batch_size)

INFO:tensorflow:Using config: {'_cluster': None, '_num_ps_replicas': 0, '_keep_checkpoint_max': 5, '_train_distribute': None, '_num_worker_replicas': 1, '_task_type': 'worker', '_cluster_spec': <tensorflow.python.training.server_lib.ClusterSpec object at 0x7f0aa838af28>, '_is_chief': True, '_master': '', '_session_config': allow_soft_placement: true
graph_options {
  rewrite_options {
    meta_optimizer_iterations: ONE
  }
}
, '_tpu_config': TPUConfig(iterations_per_loop=1, num_shards=1, num_cores_per_replica=None, per_host_input_for_training=3, tpu_job_name=None, initial_infeed_sleep_secs=None, input_partition_dims=None), '_save_checkpoints_steps': 1, '_log_step_count_steps': None, '_save_checkpoints_secs': None, '_protocol': None, '_evaluation_master': '', '_experimental_distribute': None, '_global_id_in_cluster': 0, '_eval_distribute': None, '_task_id': 0, '_service': None, '_save_summary_steps': 100, '_tf_random_seed': None, '_device_fn': None, '_keep_checkpoint_every_n_hours': 100

Read test dataset.

In [10]:
%%time

predict_examples = processor.get_test_examples(FLAGS.data_dir)

predict_file = os.path.join(FLAGS.output_dir, "predict.tf_record")

file_based_convert_examples_to_features(predict_examples, label_list,
                                        FLAGS.max_seq_length, tokenizer,
                                        predict_file)

predict_drop_remainder = True if FLAGS.use_tpu else False

predict_input_fn = file_based_input_fn_builder(
    input_file=predict_file,
    seq_length=FLAGS.max_seq_length,
    is_training=False,
    drop_remainder=predict_drop_remainder)

INFO:tensorflow:Writing example 0 of 2
INFO:tensorflow:*** Example ***
INFO:tensorflow:guid: test-0
INFO:tensorflow:tokens: [CLS] 1 . a rotational angle detection device for a permanent magnet dynamo - electric machine comprising : a stat ##or that includes arm ##at ##ure winding ; a rotor that includes a rotor core and a plurality of permanent magnet ##s ; and a conduct ##ive circuit that includes first conductors extending in an axial direction of the rotor and being disposed in at least two places in a ci ##rc ##um ##fer ##ential direction of the rotor , and second conductors for electrically inter ##con ##ne ##cting the first conductors , wherein , the first conductors are each disposed within one of a range of an electrical angle between & # x ##22 ##12 ; 45 & # x ##b ##0 ; and + 45 & # x ##b ##0 ; in the ci ##rc ##um ##fer ##ential direction with a magnetic pole center of each of the plurality of permanent magnet ##s being set as a reference and a range of an electrical angle bet

INFO:tensorflow:input_ids: 101 1015 1012 1037 4118 9605 1024 13851 16175 11522 3695 13113 1006 14925 2290 1007 2951 2012 2019 14925 2290 13617 2291 1025 6364 1996 10596 14925 2290 2951 2012 1996 14925 2290 13617 2291 2000 5646 2028 2030 2062 5005 5300 1010 16726 2169 5005 3643 2003 24668 1997 1037 10903 1997 1037 6922 1997 1996 10596 14925 2290 2951 2008 2003 9280 3141 2000 1037 2512 1011 2540 3786 3120 1025 1998 6016 1996 2028 2030 2062 5005 5300 2000 2019 12978 13460 23416 2075 2291 2000 13460 23416 1996 14925 2290 13617 2291 1012 1016 1012 1996 4118 1997 4366 1015 1010 16726 1996 12978 13460 23416 2075 2291 10438 2019 4106 1997 1996 2028 2030 2062 5005 5300 1998 14847 1037 3120 1997 1037 3327 6922 1997 1996 10596 14925 2290 2951 2008 2003 3141 2000 1037 3327 2512 1011 2540 3786 3120 2241 2006 1996 4106 1012 1017 1012 1996 4118 1997 4366 1016 1010 2582 9605 1010 2044 6016 1996 2028 2030 2062 5005 5300 1010 4909 2019 7953 1998 1010 26651 2000 1996 7953 1010 26709 6593 17441 2028 2030 

In [11]:
result = estimator.predict(input_fn=predict_input_fn)

In [14]:
#It will take about one hour on a CPU environment.

result = list(result)

In [15]:
len(result)

Expected: 2502

In [17]:
import pandas as pd

In [104]:
test_df = pd.read_csv("../data/test.tsv", sep='\t')

In [109]:
test_df.head()

Unnamed: 0,index,claim_app,claim_cited_grant,label
0,0,1 . A rotational angle detection device for a ...,1. A tangible computer-readable medium having ...,not_entailment
1,1,1 . A method comprising:sensing electrocardiog...,"1. A medical system, comprising:an implantable...",entailment
2,2,1 . A system for controlling temperature insid...,1. An integrated differential receiver for an ...,not_entailment
3,3,1 . A recording apparatus for recording update...,1. A method for indicating a priority of a Voi...,not_entailment
4,4,1 . Apparatus for moving a material within an ...,1. A wellbore packer having an expandable pack...,entailment


#### Label { 0: not cited, 1: cited }

In [110]:
test_df['label'] = [ 0 if elem == 'not_entailment' else 1 for elem in test_df['label'] ]

In [111]:
test_df.head()

Unnamed: 0,index,claim_app,claim_cited_grant,label
0,0,1 . A rotational angle detection device for a ...,1. A tangible computer-readable medium having ...,0
1,1,1 . A method comprising:sensing electrocardiog...,"1. A medical system, comprising:an implantable...",1
2,2,1 . A system for controlling temperature insid...,1. An integrated differential receiver for an ...,0
3,3,1 . A recording apparatus for recording update...,1. A method for indicating a priority of a Voi...,0
4,4,1 . Apparatus for moving a material within an ...,1. A wellbore packer having an expandable pack...,1


In [114]:
test_df['prediction_score'] = result

In [115]:
test_df.head()

Unnamed: 0,index,claim_app,claim_cited_grant,label,prediction_score
0,0,1 . A rotational angle detection device for a ...,1. A tangible computer-readable medium having ...,0,"[0.999931, 6.906676e-05]"
1,1,1 . A method comprising:sensing electrocardiog...,"1. A medical system, comprising:an implantable...",1,"[4.950189e-05, 0.9999505]"
2,2,1 . A system for controlling temperature insid...,1. An integrated differential receiver for an ...,0,"[0.99992955, 7.043116e-05]"
3,3,1 . A recording apparatus for recording update...,1. A method for indicating a priority of a Voi...,0,"[0.99994314, 5.6807927e-05]"
4,4,1 . Apparatus for moving a material within an ...,1. A wellbore packer having an expandable pack...,1,"[6.120109e-05, 0.99993885]"


Save the results.

In [116]:
test_df.to_pickle("../data/bert-training-results/patent_1000/predict_result_df.pkl")

Read the results.

In [18]:
test_df = pd.read_pickle("../data/bert-training-results/patent_1000/predict_result_df.pkl")

In [19]:
test_df.head()

Unnamed: 0,index,claim_app,claim_cited_grant,label,prediction_score
0,0,1 . A rotational angle detection device for a ...,1. A tangible computer-readable medium having ...,0,"[0.999931, 6.906676e-05]"
1,1,1 . A method comprising:sensing electrocardiog...,"1. A medical system, comprising:an implantable...",1,"[4.950189e-05, 0.9999505]"
2,2,1 . A system for controlling temperature insid...,1. An integrated differential receiver for an ...,0,"[0.99992955, 7.043116e-05]"
3,3,1 . A recording apparatus for recording update...,1. A method for indicating a priority of a Voi...,0,"[0.99994314, 5.6807927e-05]"
4,4,1 . Apparatus for moving a material within an ...,1. A wellbore packer having an expandable pack...,1,"[6.120109e-05, 0.99993885]"


In [20]:
test_df['prediction_label'] = [
    1 if score_1 >= 0.5 else 0 for score_0, score_1 in test_df['prediction_score']
]

In [21]:
accuracy = len( test_df[test_df['label'] == test_df['prediction_label']] ) / len( test_df['label'] )
precision = len( test_df[(test_df['label'] == 1) & (test_df['prediction_label'] == 1)] ) / sum( test_df['label'] == 1 )
recall = len( test_df[(test_df['label'] == 1) & (test_df['prediction_label'] == 1)] ) / sum( test_df['prediction_label'] == 1 )

In [22]:
print(accuracy)
print(precision)
print(recall)

0.9372501998401279
0.9232613908872902
0.9498355263157895


Check the result visually.

In [23]:
test_df.loc[0,['claim_app']].values

array(['1 . A rotational angle detection device for a permanent magnet dynamo-electric machine comprising:a stator that includes armature winding; a rotor that includes a rotor core and a plurality of permanent magnets; and a conductive circuit that includes first conductors extending in an axial direction of the rotor and being disposed in at least two places in a circumferential direction of the rotor, and second conductors for electrically interconnecting the first conductors, wherein, the first conductors are each disposed within one of a range of an electrical angle between &#x2212;45&#xb0; and +45&#xb0; in the circumferential direction with a magnetic pole center of each of the plurality of permanent magnets being set as a reference and a range of an electrical angle between &#x2212;45&#xb0; and +45&#xb0; with a position apart from the magnetic pole center by an electrical angle of 90&#xb0; being set as a reference, the conductive circuit being disposed in at least one place in t

In [24]:
test_df.loc[0,['claim_cited_grant']].values

array(['1. A tangible computer-readable medium having stored thereon computer-executable instructions for implementing a customizable visual user interface on a screen display of a telephony device comprising:an application program for providing services to a user on the telephony device; a shell program that exposes an application programming interface for customizing the customizable visual user interface; a message center operatively connected to the shell program for providing access to a user message; and a telephony module for managing telephony functions, wherein the application programming interface exposed by the shell program comprises one or more methods, the application programming interface receiving input from an application, and wherein execution of the one or more methods of the application programming interface is responsive to the input from the application for customizing the customizable visual user interface,wherein the telephony module includes an operator agent f

In [25]:
test_df.loc[1,['claim_app']].values

array(['1 . A method comprising:sensing electrocardiogram (ECG) data at an ECG sensor system; processing the sensed ECG data at the ECG sensor system to determine one or more noise values, wherein each noise value is indicative of a measurement of a component of the sensed ECG data that is potentially related to a non-heart beat source; and sending the one or more noise values to an automated troubleshooting system to troubleshoot the ECG sensor system. 2 . The method of  claim 1 , wherein the automated troubleshooting system performs an analysis of the one or more noise values and identifies a source of a particular component of the sensed ECG data that is related to a particular non-heart beat source based on the analysis. 3 . The method of  claim 2 , further comprising, after sending the one or more noise values, receiving an input and, responsive to the input, deactivating one or more circuit elements of the ECG sensor system to reduce the one or more noise values associated with t

In [26]:
test_df.loc[1,['claim_cited_grant']].values

array(['1. A medical system, comprising:an implantable device including a housing and a signal storage element enclosed in the housing, the housing forming an electrode of the device; an implantable medical electrical lead including a first electrode and a second electrode, the first and second electrodes forming a sensing pair when the lead is coupled to the device; and a device processor enclosed in the housing and programmed to detect noise on the sensing pair, identify the noise as system noise, and store at least one noisy EGM sample from the sensing pair and a concurrent EGM sample from each of a plurality of recording pairs in response to identifying the system noise, wherein the plurality of recording pairs includes a first pair formed by the first lead electrode and the device electrode, when the lead is coupled to the device, and a second pair formed by the second lead electrode and the device electrode, when the lead is coupled to the device. 2. The system of  claim 1 , furt

Is it ... okay?

### Predict one sample.

input data was created by
- `file_based_input_fn_builder`
- <- Read file created by `file_based_convert_examples_to_features`

In [27]:
import collections

In [28]:
from run_classifier import convert_single_example
from run_classifier import InputExample
from run_classifier import file_based_input_fn_builder

In [29]:
example = InputExample(
    guid = 0,
    text_a = "This is a test. text a represents claims in an application.",
    text_b = "This is a test. test b stands for claims in a grant. 実は日本語もおk",
    label = "not_entailment"
)

In [30]:
feature = convert_single_example(
    ex_index=0, 
    example=example,
    label_list=label_list,
    max_seq_length=FLAGS.max_seq_length,
    tokenizer=tokenizer)

INFO:tensorflow:*** Example ***
INFO:tensorflow:guid: 0
INFO:tensorflow:tokens: [CLS] this is a test . text a represents claims in an application . [SEP] this is a test . test b stands for claims in a grant . [UNK] は 日 本 語 も ##お ##k [SEP]
INFO:tensorflow:input_ids: 101 2023 2003 1037 3231 1012 3793 1037 5836 4447 1999 2019 4646 1012 102 2023 2003 1037 3231 1012 3231 1038 4832 2005 4447 1999 1037 3946 1012 100 1672 1864 1876 1950 1681 30176 2243 102 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0

In [31]:
def create_int_feature(values):
    f = tf.train.Feature(int64_list=tf.train.Int64List(value=list(values)))
    return f

In [32]:
features = collections.OrderedDict()
features["input_ids"] = create_int_feature(feature.input_ids)
features["input_mask"] = create_int_feature(feature.input_mask)
features["segment_ids"] = create_int_feature(feature.segment_ids)
features["label_ids"] = create_int_feature([feature.label_id])

tf_example = tf.train.Example(features=tf.train.Features(feature=features))

In [33]:
filename = "../data/bert-training-results/patent_1000/tmp.tf_record"

In [34]:
writer = tf.python_io.TFRecordWriter(filename)
writer.write(tf_example.SerializeToString())
writer.close()

In [37]:
input_fn = file_based_input_fn_builder(
    input_file=filename,
    seq_length=FLAGS.max_seq_length,
    is_training=False,
    drop_remainder=False)

result = estimator.predict(input_fn=input_fn)
result = list(result)

In [38]:
result

[array([3.6324293e-04, 9.9963677e-01], dtype=float32)]

In [39]:
os.remove(filename)

## Trial and Errors.

In [25]:
MAX_SEQ_LENGTH = 512

In [26]:
processor = RteProcessor()
label_list = processor.get_labels()

In [27]:
tokenizer = tokenization.FullTokenizer(
  vocab_file="./bert/model/uncased_L-12_H-768_A-12/vocab.txt", do_lower_case=True)

In [28]:
run_config = tf.estimator.RunConfig(model_dir="../data/bert-training-results/patent_1000")

In [29]:
run_config = tf.contrib.tpu.RunConfig()

In [30]:
model_fn = model_fn_builder(
  bert_config=bert_config,
  num_labels=len(label_list),
  init_checkpoint="../data/bert-training-results/patent_1000/",
  learning_rate=0,
  num_train_steps=0,
  num_warmup_steps=0,
  use_tpu=False,
  use_one_hot_embeddings=False)

In [31]:
# estimator = tf.estimator.Estimator(
#     model_fn=model_fn,
#     config=run_config)

In [32]:
estimator = tf.contrib.tpu.TPUEstimator(
  use_tpu=False,
  model_fn=model_fn,
  config=run_config,
  train_batch_size=1,
  eval_batch_size=1,
  predict_batch_size=1)

INFO:tensorflow:Using config: {'_master': '', '_tf_random_seed': None, '_num_worker_replicas': 1, '_save_checkpoints_secs': 600, '_save_checkpoints_steps': None, '_device_fn': None, '_session_config': None, '_save_summary_steps': 100, '_global_id_in_cluster': 0, '_num_ps_replicas': 0, '_evaluation_master': '', '_task_type': 'worker', '_is_chief': True, '_service': None, '_keep_checkpoint_max': 5, '_model_dir': '/tmp/tmpl3oazqub', '_train_distribute': None, '_keep_checkpoint_every_n_hours': 10000, '_task_id': 0, '_log_step_count_steps': None, '_cluster_spec': <tensorflow.python.training.server_lib.ClusterSpec object at 0x7fbf96241198>, '_cluster': None, '_tpu_config': TPUConfig(iterations_per_loop=2, num_shards=None, num_cores_per_replica=None, per_host_input_for_training=2, tpu_job_name=None, initial_infeed_sleep_secs=None)}
INFO:tensorflow:_TPUContext: eval_on_tpu True


In [18]:
import os

In [19]:
eval_file = os.path.join("../data/bert-training-results/patent_1000", "eval.tf_record")

In [20]:
eval_input_fn = file_based_input_fn_builder(
    input_file=eval_file,
    seq_length=MAX_SEQ_LENGTH,
    is_training=False,
    drop_remainder=False)

In [22]:
# result = estimator.evaluate(input_fn=eval_input_fn)

In [1]:
import tensorflow as tf