In [1]:
import os
from pathlib import Path
import ipywidgets as widgets
from IPython.display import display

NOTEBOOK_DIR = os.path.abspath(os.getcwd())
BASE_DIR = os.path.abspath(os.path.join(NOTEBOOK_DIR, '..'))
DATA_DIR = os.path.join(BASE_DIR, 'data')
os.makedirs(DATA_DIR, exist_ok=True)

uploader = widgets.FileUpload(
    accept='.csv,.xlsx,.xls',
    multiple=False,
    description='Upload CSV/Excel'
)
status = widgets.Label('Please upload your file (CSV or Excel).')
display(uploader, status)

def _save_upload(change):
    if not uploader.value:
        return
    value = uploader.value
    if isinstance(value, dict):
        fileinfo = list(value.values())[0]
    else:
        fileinfo = list(value)[0]
    filename = fileinfo.get('metadata', {}).get('name') or fileinfo.get('name')
    out_path = os.path.join(DATA_DIR, filename)
    with open(out_path, 'wb') as f:
        f.write(fileinfo['content'])
    status.value = f'Uploaded: {out_path}'
    globals()['INPUT_PATH'] = out_path

uploader.observe(_save_upload, names='value')


FileUpload(value=(), accept='.csv,.xlsx,.xls', description='Upload CSV/Excel')

Label(value='Please upload your file (CSV or Excel).')

In [4]:
import os
import yaml
import ipywidgets as widgets
from IPython.display import display

NOTEBOOK_DIR = os.path.abspath(os.getcwd())
PROMPT_FILE = os.path.join(NOTEBOOK_DIR, 'sentiment_prompt.yaml')

prompt_mode = widgets.ToggleButtons(
    options=['Use sentiment_prompt.yaml', 'Use custom prompt'],
    description='Prompt:'
)
custom_prompt = widgets.Textarea(
    description='Custom',
    placeholder='Paste your prompt here...',
    layout=widgets.Layout(width='100%', height='200px')
)
status = widgets.Label('Prompt source: sentiment_prompt.yaml')
display(prompt_mode, status)
display(custom_prompt)

def _load_prompt_from_yaml(path):
    with open(path, 'r', encoding='utf-8') as f:
        data = yaml.safe_load(f) or {}
    return data.get('sentiment_prompt', '')

def _update_prompt(change=None):
    if prompt_mode.value == 'Use sentiment_prompt.yaml':
        prompt_text = _load_prompt_from_yaml(PROMPT_FILE)
        status.value = f'Prompt source: {PROMPT_FILE}'
    else:
        prompt_text = custom_prompt.value
        status.value = 'Prompt source: custom text'
    globals()['PROMPT_TEXT'] = prompt_text

prompt_mode.observe(_update_prompt, names='value')
custom_prompt.observe(_update_prompt, names='value')
_update_prompt()

def _toggle_custom_visibility(change=None):
    if prompt_mode.value == 'Use custom prompt':
        custom_prompt.layout.display = ''
    else:
        custom_prompt.layout.display = 'none'

prompt_mode.observe(_toggle_custom_visibility, names='value')
_toggle_custom_visibility()


ToggleButtons(description='Prompt:', options=('Use sentiment_prompt.yaml', 'Use custom prompt'), value='Use se…

Label(value='Prompt source: sentiment_prompt.yaml')

Textarea(value='', description='Custom', layout=Layout(height='200px', width='100%'), placeholder='Paste your …

In [5]:
import os
import sys

NOTEBOOK_DIR = os.path.abspath(os.getcwd())
BASE_DIR = os.path.abspath(os.path.join(NOTEBOOK_DIR, '..'))
if BASE_DIR not in sys.path:
    sys.path.insert(0, BASE_DIR)

import json
import asyncio
import pandas as pd
from jinja2 import Template

from src.labeling_utils import AsyncProcessor

# Required: INPUT_PATH (from upload cell) and PROMPT_TEXT (from prompt cell)
if 'INPUT_PATH' not in globals():
    raise ValueError('INPUT_PATH is not set. Please upload a CSV/Excel file first.')
if 'PROMPT_TEXT' not in globals() or not PROMPT_TEXT.strip():
    raise ValueError('PROMPT_TEXT is empty. Please choose a prompt.')

api_key = os.environ.get('GOOGLE_API_KEY')
if not api_key:
    raise ValueError('GOOGLE_API_KEY is not set in the environment.')

def load_input(path):
    if path.lower().endswith('.csv'):
        return pd.read_csv(path)
    if path.lower().endswith('.xlsx') or path.lower().endswith('.xls'):
        return pd.read_excel(path)
    raise ValueError(f'Unsupported input file: {path}')

df = load_input(INPUT_PATH)

# Minimal normalization
if 'text' not in df.columns and 'Message Content' in df.columns:
    df = df.rename(columns={'Message Content': 'text'})
if 'text' not in df.columns:
    raise ValueError('Missing text column (text or Message Content).')

if 'id' not in df.columns:
    df.insert(0, 'id', range(len(df)))

if 'Brands_Mentioned' not in df.columns:
    df['Brands_Mentioned'] = ''
if 'Features_Mentioned' not in df.columns:
    df['Features_Mentioned'] = ''

template = Template(PROMPT_TEXT)

jsonl_data = []
for _, row in df.iterrows():
    text_content = str(row.get('text', '')).strip()
    if not text_content:
        continue
    brands = str(row.get('Brands_Mentioned', '')).split(',') if row.get('Brands_Mentioned', '') else []
    features = str(row.get('Features_Mentioned', '')).split(',') if row.get('Features_Mentioned', '') else []
    row_key = str(row.get('id'))

    rendered_prompt = template.render(
        id=row_key,
        input_text=text_content,
        features_mentioned=str([f.strip() for f in features if f.strip()]),
        brands_mentioned=str([b.strip() for b in brands if b.strip()])
    )

    request_body = {
        'contents': [
            {
                'role': 'user',
                'parts': [{'text': rendered_prompt}]
            }
        ],
        'generation_config': {
            'response_mime_type': 'application/json',
            'temperature': 0.1
        }
    }

    jsonl_data.append({
        'key': row_key,
        'request': request_body
    })

print(f'Prepared {len(jsonl_data)} rows for inference.')

async def run_batch():
    processor = AsyncProcessor(rpm_limit=120, max_concurrent=2, api_key=api_key)
    return await processor.process_batch(jsonl_data)

results = asyncio.run(run_batch())

OUTPUT_JSONL_PATH = os.path.join(os.path.dirname(INPUT_PATH), 'output_predictions.jsonl')
with open(OUTPUT_JSONL_PATH, 'w', encoding='utf-8') as f:
    for res in results:
        f.write(json.dumps(res) + '\n')

print(f'Saved: {OUTPUT_JSONL_PATH}')


ModuleNotFoundError: No module named 'aiolimiter'