# Below, I present a simple chatbot app that addresses diverse inquiries using LLMs. It runs fast locally thanks to the use of the pre-trained llms from hugging face.


1. Classify car reviews
2. Translate a car review from english to spanish
3. Ask a question about a car review
4. Summarize and analyze a car review

In [None]:
# View data and its columns, gather insights
import pandas as pd
data = pd.read_csv('car_reviews.csv', sep=';')
print(data.columns)

## 1. Sentiment analysis with classification LLM

In [None]:
# Loading the libraries 
import pandas as pd
from transformers import logging, pipeline
from sklearn.metrics import accuracy_score, f1_score

logging.set_verbosity(logging.WARNING)

# Loading sentiment analysis pipeline from hugging face
model_name_sentiment = "distilbert-base-uncased-finetuned-sst-2-english"
sentiment_pipeline = pipeline("sentiment-analysis", model=model_name_sentiment)
data = pd.read_csv('car_reviews.csv', sep=';')

# Predicting sentiment labels for the reviews
predicted_labels = sentiment_pipeline(data['Review'].tolist())
print(predicted_labels)

# Converting output to {0,1} format
predictions = [1 if r['label'].upper() == 'POSITIVE' else 0 for r in predicted_labels]
true_labels = [1 if str(label).upper() == 'POSITIVE' else 0 for label in data['Class']]

# Evaluating the classification accuracy and F1 score of predictions
accuracy_result = accuracy_score(true_labels, predictions)
f1_result = f1_score(true_labels, predictions, pos_label=1)

print(f"Predictions (1 is positive, 0 is negative): {predictions}")
print(f"Accuracy: {accuracy_result:.2f}")
print(f"F1 Score: {f1_result:.2f}")


## 2. Translation LLM 
Extract and pass the first two sentences of the first review in the dataset to an English-to-Spanish translation LLM. Calculate the BLEU score to assess translation quality, using the content in reference_translations.txt as references.

In [None]:
# Loading the additional libraries
from nltk.translate.bleu_score import sentence_bleu
from transformers import pipeline
import evaluate

# Extracting and passing the first two sentences of the first review
first_review = data['Review'][0]
first_two_sentences = '.'.join(first_review.split('.')[:2]).strip() + '.'

# Loading the English-to-Spanish translation LLM from hugging face
model_name_translation = "Helsinki-NLP/opus-mt-en-es"
translator = pipeline("translation_en_to_es", model = model_name_translation)
translated_review = translator(first_two_sentences, clean_up_tokenization_spaces=True)[0]['translation_text']

# Evaluating the model with the BLEU SCORE
with open("reference_translations.txt", encoding='utf-8') as file:
    reference_text = file.read().strip()

bleu = evaluate.load("bleu")
bleu_score = bleu.compute(predictions=[translated_review], references=[[reference_text]])

print("BLEU score dictionary:", bleu_score)


## 3. Q&A LLM - asking a question about a review

In [None]:
# Loading the additional libraries
from transformers import AutoModelForQuestionAnswering, AutoTokenizer, pipeline

model_name_qa = "deepset/minilm-uncased-squad2"

# Define the question and context (the second review in the dataset)
question = "What did he like about the brand?"
context_second = data['Review'][1]

# Loading the model and tokenizer
nlp = pipeline('question-answering', model=model_name_qa, tokenizer=model_name_qa)
QA_input = {
    'question': question,
    'context': context_second
}

answer = nlp(QA_input)
print(answer)

## 4. Summarizing LLM
Summarize the last review from the dataset, between 50-55 tokens.

In [None]:
# Loading the summarization pipeline from hugging face
summarizer = pipeline(task="summarization", model= "facebook/bart-large-cnn")
last_review = data['Review'].iloc[-1]

summary_output = summarizer(
    last_review,
    max_length=55,   
    min_length=50
)

summarized_text = summary_output[0]['summary_text']
print(summarized_text)