In [1]:
# !pip install pandas tqdm torch transformers scikit-learn spacy
# !python -m spacy download en_core_web_sm

In [2]:
# Import necessary libraries
import pandas as pd
from pathlib import Path
from tqdm.auto import tqdm
import torch
import re
import gc
import spacy # For sentence splitting
from transformers import AutoTokenizer, AutoModelForSequenceClassification
import os # For ensuring directory creation if needed

# Display options for pandas
pd.set_option('display.max_colwidth', 200) # Show more text in DataFrame cells

  from .autonotebook import tqdm as notebook_tqdm


In [3]:
# --- Configuration ---
ASSISTANT_NAMES = ["alexa", "google"] # Process both assistants, or just one e.g., ["alexa"]

# Model checkpoint (chosen from pilot study)
MODEL_DEBERTA_ABSA = 'yangheng/deberta-v3-base-absa-v1.1'
MAX_SEQ_LENGTH = 512 # Max sequence length for the model

# --- Path Setup ---
# If running in Colab, you'll likely mount Google Drive
# Example for Colab:
# from google.colab import drive
# drive.mount('/content/drive')
# THESIS_ROOT = Path("/content/drive/MyDrive/your_thesis_folder/") # ADJUST THIS

# If running locally and your notebook is in thesis_root/notebooks/
THESIS_ROOT = Path("./").parent # Assuming notebook is one level down from thesis_root
# For simplicity, define THESIS_ROOT directly if the above doesn't work in your setup
# Ensure this path points to the root of your thesis folder where 'results' is located.
try:
    # This works if your notebook is in a 'notebooks' or 'scripts' subfolder of THESIS_ROOT
    NOTEBOOK_DIR = Path.cwd()
    THESIS_ROOT = NOTEBOOK_DIR.parent
    if not (THESIS_ROOT / "results").exists(): # Basic check
        print(f"Attempted THESIS_ROOT: {THESIS_ROOT} but results folder not found. Trying current dir.")
        THESIS_ROOT = Path.cwd() # Fallback to current directory if 'results' isn't in parent
except:
    THESIS_ROOT = Path("./") # Default if Path.cwd().parent fails (e.g. top level)

print(f"Using THESIS_ROOT: {THESIS_ROOT.resolve()}")

input_dir = THESIS_ROOT / "results"
output_dir = THESIS_ROOT / "results" / "absa_full_results"
output_dir.mkdir(parents=True, exist_ok=True)
print(f"Input directory: {input_dir.resolve()}")
print(f"Output directory for ABSA results: {output_dir.resolve()}")


# --- Device Setup ---
if torch.cuda.is_available():
    DEVICE = torch.device("cuda")
    DEVICE_NAME_INFO = torch.cuda.get_device_name(0)
    print(f"Using GPU: {DEVICE_NAME_INFO}")
else:
    DEVICE = torch.device("cpu")
    print("Using CPU. Processing will be slower.")


# --- Aspect Taxonomy ---
taxonomy = {
    "Functionality & Performance": [
        "command", "task", "function", "request", "execute", "perform", "play", "control",
        "music", "timer", "alarm", "respond", "slow", "fast", "quick", "accurate", "ability",
        "capability", "feature", "work", "operation", "answer", "weather", "news", "skill",
        "search", "query", "song", "playlist", "speed", "performance", "reliable", "inconsistent",
        "consistent", "accomplish", "smart", "intelligence", "stupid", "dumb", "basic"
    ],
    "Voice Recognition": [
        "hear", "listen", "recognize", "understanding", "mic", "voice", "accent", "speech",
        "microphone", "wake", "alexa", "hey google", "ok google", "command", "activation",
        "trigger", "phrase", "call", "name", "hear me", "misheard", "mishear", "understand",
        "detection", "sensitivity", "accent", "pronunciation", "dialect", "language", "recognition"
    ],
    "Knowledge Base": [
        "answer", "knowledge", "info", "response", "fact", "question", "data", "correct",
        "wrong", "information", "knowing", "research", "source", "accurate", "inaccurate",
        "encyclopedia", "intelligence", "smart", "learn", "education", "informed", "wisdom",
        "trivia", "facts", "content", "query", "request", "answer", "respond"
    ],
    "Integration & Ecosystem": [
        "integrate", "connect", "compatible", "device", "home", "nest", "smart home", "ecosystem",
        "philips", "hue", "lights", "thermostat", "tv", "television", "speaker", "app", "phone",
        "smartphone", "skill", "third-party", "partner", "service", "platform", "sync",
        "connection", "pair", "bluetooth", "wifi", "wireless", "smart", "bulb", "plug", "switch",
        "camera", "doorbell", "lock", "appliance", "interoperability", "echo", "home mini"
    ],
    "Usability & Interface": [
        "setup", "interface", "easy", "use", "design", "confusing", "intuitive", "simple",
        "complicated", "difficult", "user-friendly", "accessibility", "accessible", "learn",
        "instructions", "guide", "tutorial", "help", "clear", "straightforward", "configuration",
        "settings", "customize", "personalize", "navigate", "interaction", "command structure"
    ],
    "Privacy & Security": [
        "privacy", "data", "listening", "security", "surveillance", "record", "spy", "collect",
        "tracking", "concern", "worry", "safe", "unsafe", "breach", "leak", "consent", "permission",
        "trust", "trustworthy", "creepy", "scary", "suspicious", "watching", "monitoring", "gdpr",
        "policy", "terms", "agreement", "encryption", "protected", "vulnerable", "hack", "risk",
        "danger", "paranoid", "microphone", "camera", "recording", "personal", "information", "location"
    ],
    "Updates & Evolution": [
        "update", "version", "bug", "feature", "release", "patch", "upgrade", "improve",
        "improvement", "fix", "issue", "problem", "solved", "downgrade", "regression", "change",
        "changed", "new", "added", "removed", "missing", "development", "roadmap", "progress",
        "evolve", "evolution", "grow", "maturity", "mature", "immature", "beta", "alpha", "stable"
    ],
    "Support & Service": [
        "support", "help", "service", "issue", "resolution", "customer", "contact", "call",
        "phone", "email", "chat", "representative", "agent", "ticket", "case", "response",
        "warranty", "replacement", "refund", "return", "satisfaction", "dissatisfaction",
        "frustrated", "complaint", "feedback", "solve", "solution", "troubleshoot", "repair"
    ],
    "Social & Emotional Aspects": [
        "personality", "character", "funny", "humor", "joke", "laugh", "fun", "entertaining",
        "companion", "friend", "relationship", "emotion", "emotional", "human-like", "humanlike",
        "personal", "personable", "warm", "cold", "robotic", "mechanical", "natural", "unnatural",
        "conversation", "conversational", "chat", "talk", "dialogue", "interaction", "interactive",
        "respond", "response", "reply", "engaging", "engage", "connection", "connect", "relate"
    ],
    "Personalization & Intelligence": [
        "personalize", "customize", "preference", "learn", "adapt", "suggest", "recommendation",
        "profile", "account", "user", "individual", "specific", "tailored", "custom", "habit",
        "routine", "pattern", "predict", "predictive", "anticipate", "remember", "memory",
        "context", "contextual", "awareness", "recognize", "familiar", "personal", "special",
        "unique", "adjust", "adaptation", "history", "previous", "past", "experience"
    ]
}

Attempted THESIS_ROOT: /Users/viltetverijonaite/Desktop/MSC/THESIS but results folder not found. Trying current dir.
Using THESIS_ROOT: /Users/viltetverijonaite/Desktop/MSC/THESIS/thesis
Input directory: /Users/viltetverijonaite/Desktop/MSC/THESIS/thesis/results
Output directory for ABSA results: /Users/viltetverijonaite/Desktop/MSC/THESIS/thesis/results/absa_full_results
Using CPU. Processing will be slower.
