In [10]:
! pip install transformers


Collecting transformers
  Downloading transformers-4.37.0-py3-none-any.whl.metadata (129 kB)
     ---------------------------------------- 0.0/129.4 kB ? eta -:--:--
     --- ------------------------------------ 10.2/129.4 kB ? eta -:--:--
     --- ------------------------------------ 10.2/129.4 kB ? eta -:--:--
     ----- ------------------------------- 20.5/129.4 kB 108.9 kB/s eta 0:00:01
     ----- ------------------------------- 20.5/129.4 kB 108.9 kB/s eta 0:00:01
     -------- ---------------------------- 30.7/129.4 kB 119.1 kB/s eta 0:00:01
     ----------- ------------------------- 41.0/129.4 kB 130.7 kB/s eta 0:00:01
     -------------- ---------------------- 51.2/129.4 kB 145.8 kB/s eta 0:00:01
     ----------------- ------------------- 61.4/129.4 kB 148.8 kB/s eta 0:00:01
     -------------------- ---------------- 71.7/129.4 kB 163.8 kB/s eta 0:00:01
     -------------------------- ---------- 92.2/129.4 kB 194.1 kB/s eta 0:00:01
     -------------------------- ---------- 92.

# english language on sentiment analysis

In [45]:
from transformers import pipeline #pipeline helps to call the pre-trained model
classifier = pipeline('sentiment-analysis') #use-case

No model was supplied, defaulted to distilbert-base-uncased-finetuned-sst-2-english and revision af0f99b (https://huggingface.co/distilbert-base-uncased-finetuned-sst-2-english).
Using a pipeline without specifying a model name and revision in production is not recommended.


In [62]:
classifier('We are very happy to show you the 🤗 Transformers library.') #here i am giving the a positive statement to the classifier

[{'label': '5 stars', 'score': 0.7725352048873901}]

In [61]:
classifier('The pizza is not that great but the crust is awesome') #a negative but also a appreciating about +ve thing

[{'label': '4 stars', 'score': 0.46868324279785156}]

In [60]:
results = classifier(["We are very happy to show you the 🤗 Transformers library.",
           "We hope you don't hate it."])

for result in results:
    print(f"label of statement : {result['label']}, with having score: {round(result['score'], 4)}")

label of statement : 5 stars, with having score: 0.7725
label of statement : 5 stars, with having score: 0.2365


#above statements gives the score with range of 0-1 in which the 0.5 refers neutral 0.8-0.9 refers sure and 0.1-0.3 refers the less chances to be respective statement for sentiment analysis of "dislibert-base-uncased-finetuned-sst-2-english" model

# french-language on sentiment analysis

In [17]:
classifier = pipeline('sentiment-analysis', model="nlptown/bert-base-multilingual-uncased-sentiment")

In [59]:
classifier("Esperamos que no lo odie.")

[{'label': '3 stars', 'score': 0.33688196539878845}]

In [56]:
from transformers import AutoTokenizer, TFAutoModelForSequenceClassification

In [57]:
model_name = "nlptown/bert-base-multilingual-uncased-sentiment"
# This model only exists in PyTorch, so we use the `from_pt` flag to import that model in TensorFlow.
model = TFAutoModelForSequenceClassification.from_pretrained(model_name, from_pt=True)
tokenizer = AutoTokenizer.from_pretrained(model_name)
classifier = pipeline('sentiment-analysis', model=model, tokenizer=tokenizer)

All PyTorch model weights were used when initializing TFBertForSequenceClassification.

All the weights of TFBertForSequenceClassification were initialized from the PyTorch model.
If your task is similar to the task the model of the checkpoint was trained on, you can already use TFBertForSequenceClassification for predictions without further training.


In [58]:
classifier("I am a good boy")

[{'label': '4 stars', 'score': 0.42292696237564087}]

In [64]:
from transformers import AutoTokenizer, TFAutoModelForSequenceClassification
model_name = "distilbert-base-uncased-finetuned-sst-2-english"
tf_model = TFAutoModelForSequenceClassification.from_pretrained(model_name)
tokenizer = AutoTokenizer.from_pretrained(model_name)

All PyTorch model weights were used when initializing TFDistilBertForSequenceClassification.

All the weights of TFDistilBertForSequenceClassification were initialized from the PyTorch model.
If your task is similar to the task the model of the checkpoint was trained on, you can already use TFDistilBertForSequenceClassification for predictions without further training.


In [65]:
inputs = tokenizer("We are very happy to show you the 🤗 Transformers library.")

In [66]:
print(inputs)

{'input_ids': [101, 2057, 2024, 2200, 3407, 2000, 2265, 2017, 1996, 100, 19081, 3075, 1012, 102], 'attention_mask': [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]}


In [71]:
#making the batch of the all ids
tf_batch = tokenizer(
    ["We are very happy to show you the 🤗 Transformers library.", "We hope you don't hate it."],
    padding=True,
    truncation=True,
    max_length=512,
    return_tensors="tf"
)

In [72]:
for key, value in tf_batch.items():
    print(f"{key}: {value.numpy().tolist()}")

input_ids: [[101, 2057, 2024, 2200, 3407, 2000, 2265, 2017, 1996, 100, 19081, 3075, 1012, 102], [101, 2057, 3246, 2017, 2123, 1005, 1056, 5223, 2009, 1012, 102, 0, 0, 0]]
attention_mask: [[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1], [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0]]


In [73]:
tf_outputs = tf_model(tf_batch)

In [74]:
print(tf_outputs)

TFSequenceClassifierOutput(loss=None, logits=<tf.Tensor: shape=(2, 2), dtype=float32, numpy=
array([[-4.0832963 ,  4.3364143 ],
       [ 0.08180879, -0.04178449]], dtype=float32)>, hidden_states=None, attentions=None)


In [77]:
import tensorflow as tf
tf_predictions = tf.nn.softmax(tf_outputs[0], axis=-1)

In [78]:
print(tf_predictions)

tf.Tensor(
[[2.2042994e-04 9.9977952e-01]
 [5.3085905e-01 4.6914092e-01]], shape=(2, 2), dtype=float32)


In [79]:
import tensorflow as tf
tf_outputs = tf_model(tf_batch, labels = tf.constant([1, 0]))

In [81]:
# Define the save directory
save_directory = "C://Users/vinay/Desktop/virtualenvs/fine-tuning/save_directory/"
# Save the tokenizer
tokenizer.save_pretrained(save_directory)

# Save the model
model.save_pretrained(save_directory)


In [38]:
from transformers import AutoTokenizer, TFAutoModel
tokenizer = AutoTokenizer.from_pretrained(save_directory)
TFAutoModelmodel = TFAutoModel.from_pretrained(save_directory, from_pt=True)


UnpicklingError: Weights only load failed. Re-running `torch.load` with `weights_only` set to `False` will likely succeed, but it can result in arbitrary code execution.Do it only if you get the file from a trusted source. WeightsUnpickler error: Unsupported operand 72

In [39]:
tf_outputs = tf_model(tf_batch, output_hidden_states=True, output_attentions=True)
all_hidden_states, all_attentions = tf_outputs[-2:]

In [40]:
from transformers import DistilBertTokenizer, TFDistilBertForSequenceClassification
model_name = "distilbert-base-uncased-finetuned-sst-2-english"
model = TFDistilBertForSequenceClassification.from_pretrained(model_name)
tokenizer = DistilBertTokenizer.from_pretrained(model_name)

All PyTorch model weights were used when initializing TFDistilBertForSequenceClassification.

All the weights of TFDistilBertForSequenceClassification were initialized from the PyTorch model.
If your task is similar to the task the model of the checkpoint was trained on, you can already use TFDistilBertForSequenceClassification for predictions without further training.


In [41]:
from transformers import DistilBertConfig, DistilBertTokenizer, TFDistilBertForSequenceClassification
config = DistilBertConfig(n_heads=8, dim=512, hidden_dim=4*512)
tokenizer = DistilBertTokenizer.from_pretrained('distilbert-base-uncased')
model = TFDistilBertForSequenceClassification(config)

In [42]:
from transformers import DistilBertConfig, DistilBertTokenizer, TFDistilBertForSequenceClassification
model_name = "distilbert-base-uncased"
model = TFDistilBertForSequenceClassification.from_pretrained(model_name, num_labels=10)
tokenizer = DistilBertTokenizer.from_pretrained(model_name)

Some weights of the PyTorch model were not used when initializing the TF 2.0 model TFDistilBertForSequenceClassification: ['vocab_layer_norm.weight', 'vocab_transform.weight', 'vocab_layer_norm.bias', 'vocab_transform.bias', 'vocab_projector.bias']
- This IS expected if you are initializing TFDistilBertForSequenceClassification from a PyTorch model trained on another task or with another architecture (e.g. initializing a TFBertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing TFDistilBertForSequenceClassification from a PyTorch model that you expect to be exactly identical (e.g. initializing a TFBertForSequenceClassification model from a BertForSequenceClassification model).
Some weights or buffers of the TF 2.0 model TFDistilBertForSequenceClassification were not initialized from the PyTorch model and are newly initialized: ['pre_classifier.weight', 'pre_classifier.bias', 'classifier.weight', 'classifier.bias']
You should 