# Step 0: Importing Required Packages



In [2]:
import pandas as pd
from scipy import stats
from transformers import pipeline

# Text Classification


In [None]:
textclassifier = pipeline(task="text-classification")
print(textclassifier("The movie was good"))

# Challenge: Confuse the model! Find an input that makes the model produce the score (confidence) below 0.6

# Token Classification

In [None]:
classifier = pipeline(task="token-classification")
print(classifier("Hello I'm Omar and I live in Zürich."))

# Challenge: Searching or Trying? Find 5 more entity types (e.g., "I-PER", "I-LOC", etc.)

# Fill Mask

In [None]:
classifier = pipeline("fill-mask")
print(classifier("Paris is the <mask> of France."))

# Challenge: Undercover: try to increase masked words to see the model behaviour

# Table Question Answering

In [None]:
tqa = pipeline(task="table-question-answering")
data = {"Actors": ["Brad Pitt", "Leonardo Di Caprio", "George Clooney"], "Number of movies": ["87", "53", "69"]}
table = pd.DataFrame.from_dict(data)
question = "how many movies does Leonardo Di Caprio have?"
print(tqa(table=table, query=question)['cells'][0])

# Challenge: Predictable model! How many different answers we might see?

# Question Answering

In [None]:
qa = pipeline(task="question-answering")
context = "Brad Pitt has 87, Leonardo Di Caprio has 53, and George Clooney has 69 movies."
question = "how many movies does Leonardo Di Caprio have?"
print(qa(question = question, context = context))

# Challenge: Let's take a deeper look! See the architecture of the model by calling .model.config

# Zero-Shot Classification

In [None]:
zsc = pipeline(task="zero-shot-classification")
print(zsc("Inception is the best movie ever",
    candidate_labels=["CINEMA", "MUSIC", "ART"],
))

# Challenge: Unique English words! What is the vocab_size of the Tokenizer used by zsc?

# Translation

In [None]:
en_fr_translator = pipeline(task="translation_en_to_fr")
en_fr_translator("How far is the closest city?")

# Challenge: Multilinguality! How many languages does this task support?

# Summarization

In [None]:
summarizer = pipeline(task="summarization")
summarizer("Paris is the capital and most populous city of France, with an estimated population of 2,175,601 residents as of 2018, in an area of more than 105 square kilometres (41 square miles). The City of Paris is the centre and seat of government of the region and province of Île-de-France, or Paris Region, which has an estimated population of 12,174,880, or about 18 percent of the population of France as of 2017.")

# Challenge: SHORTEEERRR! Force the model to keep the summary under 10 words.

# Text Generation

In [None]:
generator = pipeline(task="text-generation")
generator("Hello, I'm a student at", num_return_sequences=2)

# Challenge: Lullaby! Force the model to tell you a single long story.

# Feature Extraction

In [None]:
feature_extractor = pipeline("feature-extraction", framework="pt")
text = "Transformers is an awesome library!"
feature_extractor(text,return_tensors = "pt")[0].numpy().mean(axis=0)

# Challenge: Does fraework matter? pt stands for PyTorch. Will we get the same output if we use TensorFlow?

# Sentence Similarity

In [None]:
from sentence_transformers import SentenceTransformer

model = SentenceTransformer("all-MiniLM-L6-v2")
sentences = [
    "Competition day is next week",
    "Mastering this will greatly help"]

embeddings = model.encode(sentences)
similarities = model.similarity(embeddings, embeddings)
print(similarities)


# Challenges:
    # What was different for this task? Try pipeline('sentece-similarity')
    # Try to add more sentences
    # what is the vector size (embedding dimension)?
    # Try words instead of sentences