In [None]:
# Install sacrebleu if not already installed
!pip install sacrebleu

Collecting sacrebleu
  Downloading sacrebleu-2.5.1-py3-none-any.whl.metadata (51 kB)
[?25l     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/51.8 kB[0m [31m?[0m eta [36m-:--:--[0m[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m51.8/51.8 kB[0m [31m3.4 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting portalocker (from sacrebleu)
  Downloading portalocker-3.2.0-py3-none-any.whl.metadata (8.7 kB)
Collecting colorama (from sacrebleu)
  Downloading colorama-0.4.6-py2.py3-none-any.whl.metadata (17 kB)
Downloading sacrebleu-2.5.1-py3-none-any.whl (104 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m104.1/104.1 kB[0m [31m7.6 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading colorama-0.4.6-py2.py3-none-any.whl (25 kB)
Downloading portalocker-3.2.0-py3-none-any.whl (22 kB)
Installing collected packages: portalocker, colorama, sacrebleu
Successfully installed colorama-0.4.6 portalocker-3.2.0 sacrebleu-2.5.1


In [None]:
# Import necessary packages
import pandas as pd
import torch

from transformers import logging
logging.set_verbosity(logging.WARNING)

In [None]:
import os
import csv
import io
import pandas as pd

In [None]:
from sklearn.metrics import accuracy_score, f1_score
import sacrebleu

CSV_PATH  = "car_reviews.csv"                 # or "/mnt/data/car_reviews.csv"
REFS_PATH = "reference_translations.txt"      # or "/mnt/data/reference_translations.txt"

import torch
device = 0 if torch.cuda.is_available() else -1
print(f"Using device: {'GPU' if device==0 else 'CPU'}")

Using device: GPU


In [None]:
# --- Load data ----------------------------------------------------------------
if not os.path.exists(CSV_PATH):
    raise FileNotFoundError(f"Missing {CSV_PATH}. Put it next to this notebook or fix the path.")
if not os.path.exists(REFS_PATH):
    raise FileNotFoundError(f"Missing {REFS_PATH}. Put it next to this notebook or fix the path.")

# Robust CSV read (auto-detect delimiter)
with open(CSV_PATH, "r", encoding="utf-8") as f:
    raw = f.read()
sniffer = csv.Sniffer()
dialect = sniffer.sniff("\n".join(raw.splitlines()[:2]))
sep = dialect.delimiter
df = pd.read_csv(io.StringIO(raw), sep=sep, engine="python")

if len(df) == 0:
    raise ValueError("car_reviews.csv has no rows.")
df = df.head(5).copy()

# Try to detect the text & label columns
def detect_text_column(df: pd.DataFrame) -> str:
    for c in ["Review", "review", "text", "Text", "content", "Content"]:
        if c in df.columns: return c
    obj_cols = [c for c in df.columns if df[c].dtype == "object"]
    if not obj_cols: raise ValueError("No string-like column found for review text.")
    return obj_cols[0]

def detect_label_column(df: pd.DataFrame):
    for c in ["Class", "label", "sentiment", "Label", "Sentiment"]:
        if c in df.columns: return c
    return None

text_col  = detect_text_column(df)
label_col = detect_label_column(df)

texts = df[text_col].astype(str).tolist()
y_true = None
if label_col is not None:
    y_true = [1 if str(v).strip().upper() in {"1","POSITIVE","POS"} else 0 for v in df[label_col].tolist()]

In [None]:
# --- 1) Sentiment classification ----------------------------------------------
from transformers import pipeline

clf = pipeline(
    task="text-classification",
    model="distilbert-base-uncased-finetuned-sst-2-english",
    device=device
)
pred_outputs = clf(texts, truncation=True)

# Store raw labels
predicted_labels = [p.get("label", "") for p in pred_outputs]

# Map to {0,1}
def label_to_bin(lbl: str) -> int:
    lbl = (lbl or "").upper()
    return 1 if "POS" in lbl else 0

predictions = [label_to_bin(lbl) for lbl in predicted_labels]

# Metrics (if ground truth exists)
accuracy_result, f1_result = None, None
if y_true is not None:
    accuracy_result = float(accuracy_score(y_true, predictions))
    f1_result       = float(f1_score(y_true, predictions, average="binary"))

print("Sentiment done.")

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


config.json:   0%|          | 0.00/629 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/268M [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/48.0 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

Device set to use cuda:0


Sentiment done.


In [None]:
# --- 2) EN→ES translation (first two sentences of review #1) + BLEU -----------
import re

def first_two_sentences(text: str) -> str:
    sents = re.split(r"(?<=[.!?])\s+", (text or "").strip())
    return " ".join(sents[:2]).strip()

snippet = first_two_sentences(texts[0])

translator = pipeline(
    task="translation_en_to_es",
    model="Helsinki-NLP/opus-mt-en-es",
    device=device
)
translated_review = translator(snippet, max_length=400)[0]["translation_text"]

with open(REFS_PATH, "r", encoding="utf-8") as f:
    refs = [ln.strip() for ln in f if ln.strip()]

# sacrebleu expects: system outputs list, and list of reference lists
bleu = sacrebleu.corpus_bleu([translated_review], [refs if refs else [snippet]])
bleu_score = float(bleu.score)
print("Translation + BLEU done.")

config.json: 0.00B [00:00, ?B/s]

pytorch_model.bin:   0%|          | 0.00/312M [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/293 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/312M [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/44.0 [00:00<?, ?B/s]

source.spm:   0%|          | 0.00/802k [00:00<?, ?B/s]

target.spm:   0%|          | 0.00/826k [00:00<?, ?B/s]

vocab.json: 0.00B [00:00, ?B/s]

Device set to use cuda:0


Translation + BLEU done.


In [None]:
# --- 3) Extractive QA on review #2 --------------------------------------------
question = "What did he like about the brand?"
context  = texts[1] if len(texts) >= 2 else texts[-1]

qa = pipeline(
    task="question-answering",
    model="deepset/minilm-uncased-squad2",
    device=device
)
answer = qa(question=question, context=context).get("answer", "")
print("QA done.")

# --- 4) Summarize the last review (~50–55 tokens) -----------------------------
summarizer = pipeline(
    task="summarization",
    model="sshleifer/distilbart-cnn-12-6",
    device=device
)
# token counts here are subword-ish; min/max are approximate to reach ~50–55 tokens
summ = summarizer(texts[-1], min_length=45, max_length=60, do_sample=False)
summarized_text = summ[0]["summary_text"]
print("Summarization done.")

config.json:   0%|          | 0.00/477 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/133M [00:00<?, ?B/s]

Some weights of the model checkpoint at deepset/minilm-uncased-squad2 were not used when initializing BertForQuestionAnswering: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight']
- This IS expected if you are initializing BertForQuestionAnswering from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForQuestionAnswering from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


tokenizer_config.json:   0%|          | 0.00/107 [00:00<?, ?B/s]

vocab.txt: 0.00B [00:00, ?B/s]

special_tokens_map.json:   0%|          | 0.00/112 [00:00<?, ?B/s]

Device set to use cuda:0


QA done.


config.json: 0.00B [00:00, ?B/s]

pytorch_model.bin:   0%|          | 0.00/1.22G [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/1.22G [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/26.0 [00:00<?, ?B/s]

vocab.json: 0.00B [00:00, ?B/s]

merges.txt: 0.00B [00:00, ?B/s]

Device set to use cuda:0


Summarization done.


In [None]:
# --- Save & print --------------------------------------------------------------
import json

results = {
    "predicted_labels": predicted_labels,
    "predictions": predictions,
    "accuracy_result": accuracy_result,
    "f1_result": f1_result,
    "translated_review": translated_review,
    "bleu_score": bleu_score,
    "question": question,
    "context": context,
    "answer": answer,
    "summarized_text": summarized_text,
    "text_column_used": text_col,
    "label_column_used": label_col
}

with open("llm_results.json", "w", encoding="utf-8") as f:
    json.dump(results, f, ensure_ascii=False, indent=2)

print("\n=== RESULTS ===")
print(json.dumps(results, ensure_ascii=False, indent=2))
print("\nSaved -> llm_results.json")


=== RESULTS ===
{
  "predicted_labels": [
    "POSITIVE",
    "POSITIVE",
    "POSITIVE",
    "NEGATIVE",
    "POSITIVE"
  ],
  "predictions": [
    1,
    1,
    1,
    0,
    1
  ],
  "accuracy_result": 0.8,
  "f1_result": 0.8571428571428571,
  "translated_review": "Estoy muy satisfecho con mi Nissan NV SL 2014. Uso esta camioneta para mis entregas de negocios y uso personal.",
  "bleu_score": 68.88074582865504,
  "question": "What did he like about the brand?",
  "context": "The car is fine. It's a bit loud and not very powerful. On one hand, compared to its peers, the interior is well-built. The transmission failed a few years ago, and the dealer replaced it under warranty with no issues. Now, about 60k miles later, the transmission is failing again. It sounds like a truck, and the issues are well-documented. The dealer tells me it is normal, refusing to do anything to resolve the issue. After owning the car for 4 years, there are many other vehicles I would purchase over this one

## Process & Findings

### 1. Sentiment Classification
- **Task**: Classify 5 car reviews into positive/negative sentiment.  
- **Model Used**: `distilbert-base-uncased-finetuned-sst-2-english`  
- **Predicted Labels**: `["POSITIVE", "POSITIVE", "POSITIVE", "NEGATIVE", "POSITIVE"]`
- - **Binary Predictions**: `[1, 1, 1, 0, 1]`  
- **Results**:  
- Accuracy: **0.80**  
- F1 Score: **0.857**  

*Insight*: The model correctly captured most customer sentiments, though one misclassification impacted accuracy.

---

### 2. Translation (EN → ES)
- **Task**: Translate the first two sentences of the first review into Spanish.  
- **Model Used**: `Helsinki-NLP/opus-mt-en-es`  
- **Generated Translation**:  
> *"Estoy muy satisfecho con mi Nissan NV SL 2014. Uso esta camioneta para mis entregas de negocios y uso personal."*

- **Reference Translations** (from file):​:contentReference[oaicite:0]{index=0}  
- **BLEU Score**: **68.88**

*Insight*: The translation was fluent and aligned well with provided references, yielding a strong BLEU score.

---

### 3. Extractive Question Answering
- **Context**: Second review (customer described both positives and negatives).  
- **Question**: *“What did he like about the brand?”*  
- **Answer Extracted**:  
> **"ride quality, reliability"**

*Insight*: The QA model successfully pinpointed the brand aspects valued by the customer despite broader negative context.

---

### 4. Summarization
- **Task**: Summarize the last review into ~50–55 tokens.  
- **Model Used**: `sshleifer/distilbart-cnn-12-6`  
- **Summarized Text**:  
> *"Nissan Rogue provides the desired SUV experience without burdening me with an exorbitant payment. Handling and styling are great; I have hauled 12 bags of mulch in the back with the seats down and could have held more. The engine delivers strong performance, and the ride is really smooth."*

*Insight*: The summary is concise, preserving key details on affordability, performance, and comfort.

---

## Conclusion
Through the use of pre-trained LLMs, I demonstrated that **Car-ing is Sharing** can efficiently:  
- Automate **sentiment insights** from customer reviews,  
- Provide **multilingual support** with quality translations,  
- Enable **question answering** to assist agents,  
- Deliver **summaries** for quick review analysis.  

This pilot validates the potential of LLMs to enhance customer support and internal efficiency.

---

# **Report by Nayab Irfan — AI Engineer**