In [14]:
import pandas as pd
import tensorflow as tf
from nltk import wordpunct_tokenize
from tensorflow.keras.losses import SparseCategoricalCrossentropy
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.optimizers.schedules import PolynomialDecay
from transformers import AutoTokenizer, TFAutoModelForTokenClassification, DataCollatorForTokenClassification, create_optimizer
from datasets import ClassLabel, Sequence, Dataset

In [2]:
lab2id = {',':1,'.':2,'!':3,'NaN':0}
id2lab = {v:k for k,v in lab2id.items()}

In [3]:
df = pd.read_csv('review-Copy1.csv')
df = df.dropna(axis=0, subset=['reviewText'])
df = df[["reviewText"]]
dataset = Dataset.from_pandas(df)
# dataset = dataset.train_test_split(test_size=0.2)

In [4]:
def process(text):
    lab, tokens = [], []
    tok = wordpunct_tokenize(text['reviewText'])
    while tok[0] in lab2id.keys():
        del tok[0]
    for i in range(len(tok)):
        if tok[i] in lab2id.keys():
            lab[-1] = lab2id[tok[i]]
        else:
            lab.append(0)
            tokens.append(tok[i])
    return {'tokens':tokens, 'tag':lab}

In [5]:
dataset = dataset.map(process)

  0%|          | 0/55014 [00:00<?, ?ex/s]

In [6]:
dataset.features['tag']=Sequence(feature=ClassLabel(num_classes=4, names=['Nan', ',', '.', '!']))

In [7]:
dataset = dataset.train_test_split(test_size=0.2)

In [8]:
model_checkpoint = "bert-base-cased"
tokenizer = AutoTokenizer.from_pretrained(model_checkpoint)
model = TFAutoModelForTokenClassification.from_pretrained(model_checkpoint, id2label=id2lab, label2id=lab2id, from_pt=True)
data_collator = DataCollatorForTokenClassification(tokenizer=tokenizer, return_tensors="tf")

All PyTorch model weights were used when initializing TFBertForTokenClassification.

Some weights or buffers of the TF 2.0 model TFBertForTokenClassification were not initialized from the PyTorch model and are newly initialized: ['classifier.weight', 'classifier.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [9]:
def align_labels_with_tokens(labels, word_ids):
    new_labels = []
    current_word = None
    for word_id in word_ids:
        if word_id != current_word:
            # Start of a new word!
            current_word = word_id
            label = -100 if word_id is None else labels[word_id]
            new_labels.append(label)
        elif word_id is None:
            # Special token
            new_labels.append(-100)
        else:
            # Same word as previous token
            label = labels[word_id]
            # If the label is B-XXX we change it to I-XXX
            if label % 2 == 1:
                label += 1
            new_labels.append(label)

    return new_labels

In [10]:
def tokenize_and_align_labels(examples):
    tokenized_inputs = tokenizer(
        examples["tokens"], truncation=True, is_split_into_words=True
    )
    all_labels = examples["tag"]
    new_labels = []
    for i, labels in enumerate(all_labels):
        word_ids = tokenized_inputs.word_ids(i)
        new_labels.append(align_labels_with_tokens(labels, word_ids))

    tokenized_inputs["labels"] = new_labels
    return tokenized_inputs

In [11]:
tok_df = dataset.map(
    tokenize_and_align_labels,
    batched=True,
    remove_columns=dataset["train"].column_names,
)

  0%|          | 0/45 [00:00<?, ?ba/s]

  0%|          | 0/12 [00:00<?, ?ba/s]

In [12]:
tf_train_df = tok_df["train"].to_tf_dataset(
    columns=['attention_mask', 'input_ids', 'labels', 'token_type_ids'],
    shuffle=True,
    collate_fn=data_collator,
    batch_size=8,
)
tf_test_df = tok_df["test"].to_tf_dataset(
    columns=['attention_mask', 'input_ids', 'labels', 'token_type_ids'],
    shuffle=False,
    collate_fn=data_collator,
    batch_size=8,
)

You're using a BertTokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding.
  tensor = as_tensor(value)


In [15]:
tf.keras.mixed_precision.set_global_policy("mixed_float16")

num_epochs = 3
num_train_steps = len(tf_train_df) * num_epochs

optimizer, schedule = create_optimizer(
    init_lr=2e-5,
    num_warmup_steps=0,
    num_train_steps=num_train_steps,
    weight_decay_rate=0.01,
)
model.compile(optimizer=optimizer)

No loss specified in compile() - the model's internal loss computation will be used as the loss. Don't panic - this is a common way to train TensorFlow models in Transformers! To disable this behaviour please pass a loss argument, or explicitly pass `loss=None` if you do not want your model to compute a loss.


In [17]:
model.fit(
    tf_train_df,
#     callbacks=[callback],
    epochs=num_epochs,
)

Epoch 1/3
   1/5502 [..............................] - ETA: 186:23:12 - loss: 1.3677

ResourceExhaustedError: Graph execution error:

Detected at node 'tf_bert_for_token_classification/bert/encoder/layer_._6/intermediate/Gelu/mul' defined at (most recent call last):
    File "C:\Users\Wonder-David Efe\anaconda3\envs\nlpenv\lib\runpy.py", line 196, in _run_module_as_main
      return _run_code(code, main_globals, None,
    File "C:\Users\Wonder-David Efe\anaconda3\envs\nlpenv\lib\runpy.py", line 86, in _run_code
      exec(code, run_globals)
    File "C:\Users\Wonder-David Efe\anaconda3\envs\nlpenv\lib\site-packages\ipykernel_launcher.py", line 17, in <module>
      app.launch_new_instance()
    File "C:\Users\Wonder-David Efe\anaconda3\envs\nlpenv\lib\site-packages\traitlets\config\application.py", line 982, in launch_instance
      app.start()
    File "C:\Users\Wonder-David Efe\anaconda3\envs\nlpenv\lib\site-packages\ipykernel\kernelapp.py", line 712, in start
      self.io_loop.start()
    File "C:\Users\Wonder-David Efe\anaconda3\envs\nlpenv\lib\site-packages\tornado\platform\asyncio.py", line 215, in start
      self.asyncio_loop.run_forever()
    File "C:\Users\Wonder-David Efe\anaconda3\envs\nlpenv\lib\asyncio\base_events.py", line 600, in run_forever
      self._run_once()
    File "C:\Users\Wonder-David Efe\anaconda3\envs\nlpenv\lib\asyncio\base_events.py", line 1896, in _run_once
      handle._run()
    File "C:\Users\Wonder-David Efe\anaconda3\envs\nlpenv\lib\asyncio\events.py", line 80, in _run
      self._context.run(self._callback, *self._args)
    File "C:\Users\Wonder-David Efe\anaconda3\envs\nlpenv\lib\site-packages\ipykernel\kernelbase.py", line 510, in dispatch_queue
      await self.process_one()
    File "C:\Users\Wonder-David Efe\anaconda3\envs\nlpenv\lib\site-packages\ipykernel\kernelbase.py", line 499, in process_one
      await dispatch(*args)
    File "C:\Users\Wonder-David Efe\anaconda3\envs\nlpenv\lib\site-packages\ipykernel\kernelbase.py", line 406, in dispatch_shell
      await result
    File "C:\Users\Wonder-David Efe\anaconda3\envs\nlpenv\lib\site-packages\ipykernel\kernelbase.py", line 730, in execute_request
      reply_content = await reply_content
    File "C:\Users\Wonder-David Efe\anaconda3\envs\nlpenv\lib\site-packages\ipykernel\ipkernel.py", line 383, in do_execute
      res = shell.run_cell(
    File "C:\Users\Wonder-David Efe\anaconda3\envs\nlpenv\lib\site-packages\ipykernel\zmqshell.py", line 528, in run_cell
      return super().run_cell(*args, **kwargs)
    File "C:\Users\Wonder-David Efe\anaconda3\envs\nlpenv\lib\site-packages\IPython\core\interactiveshell.py", line 2940, in run_cell
      result = self._run_cell(
    File "C:\Users\Wonder-David Efe\anaconda3\envs\nlpenv\lib\site-packages\IPython\core\interactiveshell.py", line 2995, in _run_cell
      return runner(coro)
    File "C:\Users\Wonder-David Efe\anaconda3\envs\nlpenv\lib\site-packages\IPython\core\async_helpers.py", line 129, in _pseudo_sync_runner
      coro.send(None)
    File "C:\Users\Wonder-David Efe\anaconda3\envs\nlpenv\lib\site-packages\IPython\core\interactiveshell.py", line 3194, in run_cell_async
      has_raised = await self.run_ast_nodes(code_ast.body, cell_name,
    File "C:\Users\Wonder-David Efe\anaconda3\envs\nlpenv\lib\site-packages\IPython\core\interactiveshell.py", line 3373, in run_ast_nodes
      if await self.run_code(code, result, async_=asy):
    File "C:\Users\Wonder-David Efe\anaconda3\envs\nlpenv\lib\site-packages\IPython\core\interactiveshell.py", line 3433, in run_code
      exec(code_obj, self.user_global_ns, self.user_ns)
    File "C:\Users\Wonder-David Efe\AppData\Local\Temp\ipykernel_21956\479624844.py", line 1, in <module>
      model.fit(
    File "C:\Users\Wonder-David Efe\anaconda3\envs\nlpenv\lib\site-packages\keras\utils\traceback_utils.py", line 65, in error_handler
      return fn(*args, **kwargs)
    File "C:\Users\Wonder-David Efe\anaconda3\envs\nlpenv\lib\site-packages\keras\engine\training.py", line 1564, in fit
      tmp_logs = self.train_function(iterator)
    File "C:\Users\Wonder-David Efe\anaconda3\envs\nlpenv\lib\site-packages\keras\engine\training.py", line 1160, in train_function
      return step_function(self, iterator)
    File "C:\Users\Wonder-David Efe\anaconda3\envs\nlpenv\lib\site-packages\keras\engine\training.py", line 1146, in step_function
      outputs = model.distribute_strategy.run(run_step, args=(data,))
    File "C:\Users\Wonder-David Efe\anaconda3\envs\nlpenv\lib\site-packages\keras\engine\training.py", line 1135, in run_step
      outputs = model.train_step(data)
    File "C:\Users\Wonder-David Efe\anaconda3\envs\nlpenv\lib\site-packages\transformers\modeling_tf_utils.py", line 1495, in train_step
      y_pred = self(x, training=True)
    File "C:\Users\Wonder-David Efe\anaconda3\envs\nlpenv\lib\site-packages\keras\utils\traceback_utils.py", line 65, in error_handler
      return fn(*args, **kwargs)
    File "C:\Users\Wonder-David Efe\anaconda3\envs\nlpenv\lib\site-packages\keras\engine\training.py", line 557, in __call__
      return super().__call__(*args, **kwargs)
    File "C:\Users\Wonder-David Efe\anaconda3\envs\nlpenv\lib\site-packages\keras\utils\traceback_utils.py", line 65, in error_handler
      return fn(*args, **kwargs)
    File "C:\Users\Wonder-David Efe\anaconda3\envs\nlpenv\lib\site-packages\keras\engine\base_layer.py", line 1097, in __call__
      outputs = call_fn(inputs, *args, **kwargs)
    File "C:\Users\Wonder-David Efe\anaconda3\envs\nlpenv\lib\site-packages\keras\utils\traceback_utils.py", line 96, in error_handler
      return fn(*args, **kwargs)
    File "C:\Users\Wonder-David Efe\anaconda3\envs\nlpenv\lib\site-packages\transformers\modeling_tf_utils.py", line 1881, in run_call_with_unpacked_inputs
      # Update base model and current model config
    File "C:\Users\Wonder-David Efe\anaconda3\envs\nlpenv\lib\site-packages\transformers\models\bert\modeling_tf_bert.py", line 1892, in call
      outputs = self.bert(
    File "C:\Users\Wonder-David Efe\anaconda3\envs\nlpenv\lib\site-packages\keras\utils\traceback_utils.py", line 65, in error_handler
      return fn(*args, **kwargs)
    File "C:\Users\Wonder-David Efe\anaconda3\envs\nlpenv\lib\site-packages\keras\engine\base_layer.py", line 1097, in __call__
      outputs = call_fn(inputs, *args, **kwargs)
    File "C:\Users\Wonder-David Efe\anaconda3\envs\nlpenv\lib\site-packages\keras\utils\traceback_utils.py", line 96, in error_handler
      return fn(*args, **kwargs)
    File "C:\Users\Wonder-David Efe\anaconda3\envs\nlpenv\lib\site-packages\transformers\modeling_tf_utils.py", line 1881, in run_call_with_unpacked_inputs
      # Update base model and current model config
    File "C:\Users\Wonder-David Efe\anaconda3\envs\nlpenv\lib\site-packages\transformers\models\bert\modeling_tf_bert.py", line 873, in call
      encoder_outputs = self.encoder(
    File "C:\Users\Wonder-David Efe\anaconda3\envs\nlpenv\lib\site-packages\keras\utils\traceback_utils.py", line 65, in error_handler
      return fn(*args, **kwargs)
    File "C:\Users\Wonder-David Efe\anaconda3\envs\nlpenv\lib\site-packages\keras\engine\base_layer.py", line 1097, in __call__
      outputs = call_fn(inputs, *args, **kwargs)
    File "C:\Users\Wonder-David Efe\anaconda3\envs\nlpenv\lib\site-packages\keras\utils\traceback_utils.py", line 96, in error_handler
      return fn(*args, **kwargs)
    File "C:\Users\Wonder-David Efe\anaconda3\envs\nlpenv\lib\site-packages\transformers\models\bert\modeling_tf_bert.py", line 558, in call
      for i, layer_module in enumerate(self.layer):
    File "C:\Users\Wonder-David Efe\anaconda3\envs\nlpenv\lib\site-packages\transformers\models\bert\modeling_tf_bert.py", line 564, in call
      layer_outputs = layer_module(
    File "C:\Users\Wonder-David Efe\anaconda3\envs\nlpenv\lib\site-packages\keras\utils\traceback_utils.py", line 65, in error_handler
      return fn(*args, **kwargs)
    File "C:\Users\Wonder-David Efe\anaconda3\envs\nlpenv\lib\site-packages\keras\engine\base_layer.py", line 1097, in __call__
      outputs = call_fn(inputs, *args, **kwargs)
    File "C:\Users\Wonder-David Efe\anaconda3\envs\nlpenv\lib\site-packages\keras\utils\traceback_utils.py", line 96, in error_handler
      return fn(*args, **kwargs)
    File "C:\Users\Wonder-David Efe\anaconda3\envs\nlpenv\lib\site-packages\transformers\models\bert\modeling_tf_bert.py", line 520, in call
      intermediate_output = self.intermediate(hidden_states=attention_output)
    File "C:\Users\Wonder-David Efe\anaconda3\envs\nlpenv\lib\site-packages\keras\utils\traceback_utils.py", line 65, in error_handler
      return fn(*args, **kwargs)
    File "C:\Users\Wonder-David Efe\anaconda3\envs\nlpenv\lib\site-packages\keras\engine\base_layer.py", line 1097, in __call__
      outputs = call_fn(inputs, *args, **kwargs)
    File "C:\Users\Wonder-David Efe\anaconda3\envs\nlpenv\lib\site-packages\keras\utils\traceback_utils.py", line 96, in error_handler
      return fn(*args, **kwargs)
    File "C:\Users\Wonder-David Efe\anaconda3\envs\nlpenv\lib\site-packages\transformers\models\bert\modeling_tf_bert.py", line 424, in call
      hidden_states = self.intermediate_act_fn(hidden_states)
    File "C:\Users\Wonder-David Efe\anaconda3\envs\nlpenv\lib\site-packages\keras\activations.py", line 359, in gelu
      return tf.nn.gelu(x, approximate)
Node: 'tf_bert_for_token_classification/bert/encoder/layer_._6/intermediate/Gelu/mul'
OOM when allocating tensor with shape[8,491,3072] and type float on /job:localhost/replica:0/task:0/device:CPU:0 by allocator cpu
	 [[{{node tf_bert_for_token_classification/bert/encoder/layer_._6/intermediate/Gelu/mul}}]]
Hint: If you want to see a list of allocated tensors when OOM happens, add report_tensor_allocations_upon_oom to RunOptions for current allocation info. This isn't available when running in Eager mode.
 [Op:__inference_train_function_27437]