In [None]:
!nvidia-smi
!pip -q install --upgrade pip
!pip -q install torch==2.3.1+cu118 torchvision==0.18.1+cu118 torchaudio==2.3.1+cu118 --index-url https://download.pytorch.org/whl/cu118
!pip -q install "transformers>=4.44" "accelerate>=0.31" "datasets>=2.20" "evaluate" "scikit-learn" "peft>=0.11" "tensorboard" "pyarrow<18"


In [None]:
%%bash
cat > train_samo.py <<'PY'
#!/usr/bin/env python
# -*- coding: utf-8 -*-
"""
SAMO — GoEmotions Multi‑Label Trainer (2×3090‑ready)


Launch examples (2 GPUs):
accelerate launch --num_processes=2 --mixed_precision=fp16 \
train_samo.py --train_json /path/train.jsonl --val_json /path/val.jsonl \
--output_dir ./samo_out --thresholds_json ./optimal_thresholds.json


or with torchrun:
torchrun --standalone --nproc_per_node=2 train_samo.py \
--train_json /path/train.jsonl --val_json /path/val.jsonl --use_accelerate false


"""
import os, json, math, random, argparse, warnings
from dataclasses import dataclass
from typing import List, Dict, Any


import numpy as np
import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader, WeightedRandomSampler


from transformers import (
AutoTokenizer,
AutoConfig,
AutoModelForSequenceClassification,
DataCollatorWithPadding,
TrainingArguments,
Trainer,
)


# ------------------------------
# Utilities
# ------------------------------


def set_seeds(seed: int = 42):
random.seed(seed)
np.random.seed(seed)
torch.manual_seed(seed)
torch.cuda.manual_seed_all(seed)




def enable_tf32():
try:
torch.backends.cuda.matmul.allow_tf32 = True
torch.backends.cudnn.allow_tf32 = True
except Exception:
pass
try:
torch.set_float32_matmul_precision("high")
except Exception:
pass




# ------------------------------
# Dataset loader (JSONL with {text, labels})
# ------------------------------
class JsonlMultiLabelDataset(Dataset):
def __init__(self, path: str, tokenizer, max_length: int):
self.examples = []
with open(path, "r", encoding="utf-8") as f:
for line in f:
obj = json.loads(line)
text = obj.get("text", None)
labels = obj.get("labels", None)
if text is None or labels is None:
continue
self.examples.append({"text": text, "labels": labels})
self.tokenizer = tokenizer
self.max_length = max_length


def __len__(self):
return len(self.examples)


main()
PY
python - <<'PY'
import hashlib, sys, os
p="train_samo.py"
print("Wrote", p, "bytes:", os.path.getsize(p))
print("sha256:", hashlib.sha256(open(p,'rb').read()).hexdigest())
PY


In [None]:
%%bash
accelerate config default
CONFIG=~/.cache/huggingface/accelerate/default_config.yaml
python - <<'PY'
from pathlib import Path
p = Path("~/.cache/huggingface/accelerate/default_config.yaml").expanduser()
y = p.read_text()
y = y.replace('distributed_type: NO', 'distributed_type: MULTI_GPU')
y = y.replace('mixed_precision: no', 'mixed_precision: fp16')
y = y.replace('num_processes: 1', 'num_processes: 2')
p.write_text(y)
print("Accelerate config patched:\n", p.read_text())
PY

In [None]:
TRAIN_JSON = "/workspace/data/train.jsonl" # <-- set your train path
VAL_JSON = "/workspace/data/val.jsonl" # <-- set your val path
OUT_DIR = "./samo_out"
!mkdir -p "$OUT_DIR"

In [None]:
!accelerate launch --num_processes=2 --mixed_precision=fp16 \
train_samo.py \
--train_json "$TRAIN_JSON" --val_json "$VAL_JSON" \
--output_dir "$OUT_DIR" \
--thresholds_json "$THR_JSON" \
--per_device_train_batch_size 16 --per_device_eval_batch_size 32 \
--gradient_accumulation_steps 2 \
--num_train_epochs 4 \
--learning_rate 2e-5 --lr_scheduler_type cosine --warmup_ratio 0.1 \
--weight_decay 0.01 --fp16 true --tf32 true --gradient_checkpointing true \
--ddp_backend nccl

In [None]:
import json, os
with open(os.path.join(OUT_DIR, "eval_report.json"), "r") as f:
rep = json.load(f)
print("F1_micro:", rep["f1_micro"], " F1_macro:", rep["f1_macro"])
# Show 5 worst & best classes by F1
pc = rep["per_class"]
sorted_items = sorted(pc.items(), key=lambda kv: kv[1]["f1"])
print("\nWorst 5:")
for k,v in sorted_items[:5]:
print(k, v)
print("\nBest 5:")
for k,v in sorted_items[-5:]:
print(k, v)