In [1]:
from datasets import load_dataset

ds = load_dataset("bkonkle/snips-joint-intent")

In [2]:
train, test = ds['train'], ds['test']

In [3]:
train_pd = train.to_pandas()

In [4]:
train_pd.head()

Unnamed: 0,input,intent,slots
0,listen to westbam alumb allergic on google music,PlayMusic,O O B-artist O B-album O B-service I-service
1,add step to me to the 50 clásicos playlist,AddToPlaylist,O B-entity_name I-entity_name I-entity_name O ...
2,i give this current textbook a rating value of...,RateBook,O O O B-object_select B-object_type O O O O B-...
3,play the song little robin redbreast,PlayMusic,O O B-music_item B-track I-track I-track
4,please add iris dement to my playlist this is ...,AddToPlaylist,O O B-artist I-artist O B-playlist_owner O B-p...


In [5]:
labels = dict()

for index, intent in enumerate(train_pd['intent'].unique()):
    labels[intent] = index
labels

{'PlayMusic': 0,
 'AddToPlaylist': 1,
 'RateBook': 2,
 'SearchScreeningEvent': 3,
 'BookRestaurant': 4,
 'GetWeather': 5,
 'SearchCreativeWork': 6}

In [6]:
ds_copy = train_pd.copy()

In [7]:
ds_copy['intent'] = ds_copy['intent'].map( lambda x: labels[x] )

In [8]:
ds_copy.head()

Unnamed: 0,input,intent,slots
0,listen to westbam alumb allergic on google music,0,O O B-artist O B-album O B-service I-service
1,add step to me to the 50 clásicos playlist,1,O B-entity_name I-entity_name I-entity_name O ...
2,i give this current textbook a rating value of...,2,O O O B-object_select B-object_type O O O O B-...
3,play the song little robin redbreast,0,O O B-music_item B-track I-track I-track
4,please add iris dement to my playlist this is ...,1,O O B-artist I-artist O B-playlist_owner O B-p...


In [4]:
from transformers import DistilBertTokenizer, TFDistilBertModel

tokenizer = DistilBertTokenizer.from_pretrained("distilbert-base-uncased")
transformer = TFDistilBertModel.from_pretrained("distilbert-base-uncased")

text = ["Machine learning is fascinating.", "so what you say?"]

# Use "tf" here because you are working with TensorFlow
inputs = tokenizer(text, return_tensors="tf")

# Forward pass
outputs = transformer(**inputs)

last_hidden_state = outputs.last_hidden_state
print(last_hidden_state.shape)


Some weights of the PyTorch model were not used when initializing the TF 2.0 model TFDistilBertModel: ['vocab_layer_norm.bias', 'vocab_transform.weight', 'vocab_transform.bias', 'vocab_projector.bias', 'vocab_layer_norm.weight']
- This IS expected if you are initializing TFDistilBertModel from a PyTorch model trained on another task or with another architecture (e.g. initializing a TFBertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing TFDistilBertModel from a PyTorch model that you expect to be exactly identical (e.g. initializing a TFBertForSequenceClassification model from a BertForSequenceClassification model).
All the weights of TFDistilBertModel were initialized from the PyTorch model.
If your task is similar to the task the model of the checkpoint was trained on, you can already use TFDistilBertModel for predictions without further training.


(2, 7, 768)


In [5]:
import tensorflow as tf 

class DistilBERTClassifier(tf.keras.Model): 
    def __init__(self, transformer, hidden_units, num_labels): 
        super().__init__()
        self.transformer = transformer

        # self.hidden1 = tf.keras.layers.Dense(hidden_units, activation='relu')
        self.classifier = tf.keras.layers.Dense(num_labels, activation='softmax')

    def call(self,inputs):
        output = self.transformer(inputs)[0]
        clas_token = output[:, 0, :]
        # to_hidden = self.hidden1(clas_token) 
        # return self.classifier(to_hidden) 
        return self.classifier(clas_token)

num_labels = 7
hidden_units = 50
model = DistilBERTClassifier(transformer, hidden_units, num_labels)

In [11]:
feature,label = ds_copy['input'], ds_copy['intent']
X_train, y_train = feature[:10_000], label[:10_000]
X_valid, y_valid = feature[10_000:12_000], label[10_000:12_000] 
X_test, y_test   = feature[12_000:], label[12_000:]

In [12]:
model.compile(
    optimizer=tf.keras.optimizers.Adam(learning_rate=5e-5),
    loss=tf.keras.losses.SparseCategoricalCrossentropy(),
    metrics=['accuracy']
)

text_set = tokenizer( 
    X_train.to_list(), 
    truncation=True,
    padding=True, 
    return_tensors='tf'
) 

X_valid = tokenizer(
    X_valid.to_list(),
    truncation=True,
    padding=True,
    return_tensors='tf'
)

target_set = tf.convert_to_tensor(y_train.to_list())
y_valid = tf.convert_to_tensor(y_valid.to_list())



In [13]:
history = model.fit(
    x = {'input_ids': text_set['input_ids'], 'attention_mask': text_set['attention_mask']}, 
    y = target_set, 
    epochs=1,
    batch_size = 64,
    validation_data=(
        {'input_ids': X_valid['input_ids'], 'attention_mask': X_valid['attention_mask']},
        y_valid
    )
)



In [14]:
text = ['this is sample']

inputs = tokenizer(text, 
                   truncation=True,
                   padding=True,
                   return_tensors="tf")

# Forward pass
outputs = model.predict({'input_ids': inputs['input_ids'], 'attention_mask': inputs['attention_mask']})



In [23]:
# Save weights
model.save_weights("filler_s_transformer.h5")

In [7]:
new_model = DistilBERTClassifier(transformer, hidden_units, num_labels)

dummy_input_ids = tf.ones((1, 10), dtype=tf.int32)  # batch_size=1, sequence_length=10
dummy_attention_mask = tf.ones((1, 10), dtype=tf.int32)

# Call the model once to build weights
_ = new_model({'input_ids': dummy_input_ids, 'attention_mask': dummy_attention_mask})

new_model.load_weights('filler_s_transformer.h5')

In [39]:
from pathlib import Path

class filler_s_transformer:
    def __init__(self):
        self.model = self.load_model()

    def load_model(self):
        path_to_model = 'filler_s_transformer.h5'
        if Path(path_to_model).is_file():
            temp_model = DistilBERTClassifier(transformer, hidden_units, num_labels)
            dummy_input_ids = tf.ones((1, 10), dtype=tf.int32)
            dummy_attention_mask = tf.ones((1, 10), dtype=tf.int32)
            temp_model({'input_ids': dummy_input_ids, 'attention_mask': dummy_attention_mask})

            temp_model.load_weights(path_to_model)
            return temp_model

    def predict(self, data):
        ready_input = tokenizer(data, truncation=True, padding=True, return_tensors='tf')
        return self.model.predict({'input_ids': ready_input['input_ids'], 'attention_mask': ready_input['attention_mask']})

fillers_model = filler_s_transformer()

In [40]:
fillers_model.predict('what you say?')



array([[0.04758742, 0.04607378, 0.02652416, 0.6309597 , 0.00948749,
        0.00952466, 0.22984281]], dtype=float32)