<a href="https://colab.research.google.com/github/wbigger/-2020-21-webdesign-esercitazione/blob/main/sentiment_analysis_training.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Load dataset

In [4]:
from google.colab import userdata
username = userdata.get('gh_username')
print(username)

wbigger


In [None]:
!pip install transformers datasets

In [5]:
from datasets import load_dataset

# Change the username with yours
dataset = load_dataset(f"{username}/sentiment-analysis-test")




# Tokenize

In [None]:
# Add label column to dataset

label2id = {"negative": 0, "neutral": 1, "positive": 2}

def add_label_column(examples):
    examples["label"] = label2id[examples["sentiment"]]
    return examples

dataset = dataset.map(add_label_column)
print(dataset)


In [None]:
from transformers import AutoTokenizer, DataCollatorWithPadding

tokenizer = AutoTokenizer.from_pretrained("cardiffnlp/xlm-roberta-base-tweet-sentiment-it")

def tokenize_function(examples):
    return tokenizer(examples["text"], truncation=True, max_length=128)

tokenized_dataset = dataset.map(tokenize_function, batched=True)
data_collator = DataCollatorWithPadding(tokenizer=tokenizer)
print(tokenized_dataset)


# Fine tuning a pre-trained model

In [None]:
from transformers import AutoModelForSequenceClassification

checkpoint = "cardiffnlp/xlm-roberta-base-tweet-sentiment-it"
model = AutoModelForSequenceClassification.from_pretrained(checkpoint, num_labels=3)

In [None]:
# Create training parameters

from transformers import TrainingArguments

training_args = TrainingArguments("sentiment-analysis-test")



In [None]:
# Create trainer

from transformers import Trainer

trainer = Trainer(
    model,
    training_args,
    train_dataset=tokenized_dataset["train"],
    eval_dataset=tokenized_dataset["test"],
    data_collator=data_collator
)

In [None]:
# Train the model

trainer.train()

In [None]:
# Push the new model to the hub
trainer.push_to_hub()


# Evaluate!

In [None]:
from transformers import pipeline, AutoModelForSequenceClassification

# Load and test the original model
sentiment_pipeline_orig = pipeline("sentiment-analysis", model="cardiffnlp/xlm-roberta-base-tweet-sentiment-it")

prompt = "Personale docente"
print(f"Sentiment for prompt {prompt}:")
print(sentiment_pipeline_orig(prompt))


prompt = "Interrogazioni a sorpresa"
print(f"Sentiment for prompt {prompt}:")
print(sentiment_pipeline_orig(prompt))

In [None]:
from transformers import pipeline, AutoModelForSequenceClassification

# Load and test our model
sentiment_pipeline = pipeline("sentiment-analysis", model=f"{username}/sentiment-analysis-test")

prompt = "Personale docente"

print(f"Sentiment for prompt {prompt}:")
print(sentiment_pipeline(prompt))

prompt = "Interrogazioni a sorpresa"
print(f"Sentiment for prompt {prompt}:")
print(sentiment_pipeline(prompt))

# Gradio interface

In [None]:
!pip install gradio

In [None]:
import gradio as gr

# Load and test our model
sentiment_pipeline = pipeline("sentiment-analysis", model=f"{username}/sentiment-analysis-test")

def analyze_sentiment(text):
    result = sentiment_pipeline(text)
    label = result[0]['label']
    score = result[0]['score']
    return f"Label: {label}, Score: {score}"

iface = gr.Interface(
    fn=analyze_sentiment,
    inputs=gr.Textbox(label="Prompt", lines=2, placeholder="Scrivi qui qualcosa sulla tua scuola..."),
    outputs=gr.Textbox(label="Sentiment Analysis Result"),
    title="Sentiment Analysis for VIAN",
    description="Analizza i sentimenti riguardo alla tua scuola con un modello fine-tuned",
)

iface.launch()
