In [1]:
import sys
import os

sys.path.append(os.path.abspath('../'))

In [2]:
import numpy as np
os.environ["PYTORCH_CUDA_ALLOC_CONF"] = "expandable_segments:True"
import torch
torch.cuda.empty_cache()
device = 'cuda' if torch.cuda.is_available() else 'cpu'

from transformers import AutoModelForCausalLM, AutoTokenizer
from transformers import Trainer, TrainingArguments

from src.utils import logger, DatasetTypes
from src.data import get_datasets, get_dataloaders, one_hot_encoded_to_genre_list
from src.metrics import GenrePredictorInterface, evaluate_model
from src.model import get_pretrained
import json
import re
from peft import get_peft_model, LoraConfig, TaskType

device

  from .autonotebook import tqdm as notebook_tqdm


'cuda'

In [3]:
model_name = "Qwen/Qwen3-0.6B"
tokenizer, base_model = get_pretrained(model_name, device)

In [4]:
path_to_csv = '../data/all_genres_downsampled.csv'
data_dict = get_datasets(path_to_csv, tokenizer, dataset_type=DatasetTypes.small)

train_dataset, val_dataset, test_dataset = data_dict['train_dataset'], data_dict['val_dataset'], data_dict['test_dataset']
idx2genre, genre2idx = data_dict['idx2genre'], data_dict['genre2idx']
genres = [key for key, _ in genre2idx.items()]

batch_size = 4
traid_loader, val_loader, test_loader = get_dataloaders(train_dataset, val_dataset, test_dataset, batch_size)

In [5]:
lora_config = LoraConfig(
    r=8,
    lora_alpha=16,
    target_modules=["q_proj", "v_proj"],  
    bias="none",
    task_type=TaskType.CAUSAL_LM
)

model = get_peft_model(base_model, lora_config).to(device)
model.print_trainable_parameters()
model.gradient_checkpointing_enable()

trainable params: 1,146,880 || all params: 597,196,800 || trainable%: 0.1920


In [7]:
training_args = TrainingArguments(
    output_dir="./qwen_lora_genre",
    per_device_train_batch_size=batch_size,
    per_device_eval_batch_size=batch_size,
    logging_steps=50,
    save_strategy="steps",
    save_steps=200,
    num_train_epochs=3,
    learning_rate=2e-4,
    fp16=True,
    save_total_limit=2,
    gradient_accumulation_steps=4,
)

trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=train_dataset,
    eval_dataset=val_dataset,
    tokenizer=tokenizer,
)

trainer.train()


  trainer = Trainer(
No label_names provided for model class `PeftModelForCausalLM`. Since `PeftModel` hides base models input arguments, if label_names is not given, label_names can't be set automatically within `Trainer`. Note that empty label_names list will be used instead.


ValueError: Expected input batch_size (2048) to match target batch_size (352).

In [None]:
model.save_pretrained("./qwen_lora_adapter")
tokenizer.save_pretrained("./qwen_lora_adapter")

In [8]:
class LoraClassifier(GenrePredictorInterface):
    def __init__(
        self, 
        model, 
        tokenizer, 
        genres, 
        prompt_template, 
        device="cuda", 
        max_lyrics_length=300, 
        batch_size=2
    ):
        self.model = model
        self.tokenizer = tokenizer
        self.genres = genres
        self.prompt_template = prompt_template
        self.device = device
        self.max_lyrics_length = max_lyrics_length
        self.batch_size = batch_size

    def _make_prompts(self, lyrics: str) -> list[str]:
        truncated = lyrics[:self.max_lyrics_length].replace('\n', ' ').replace('"', "'")
        return [self.prompt_template % (truncated, genre) for genre in self.genres]

    def _parse_response(self, response: str) -> int:
        try:
            match = re.search(r'\{[^}]*"predict"\s*:\s*(0|1)[^}]*\}', response)
            if match:
                data = json.loads(match.group(0))
                return int(data["predict"])
        except Exception as e:
            print(f"Parse error: {e}")
        return 0

    def predict(self, batch: dict, enable_thinking=False, debug=False):
        lyrics_list = [row['lyrics'] for row in batch['features']]
        prompts, index_map = [], []

        for i, lyrics in enumerate(lyrics_list):
            for genre in self.genres:
                prompts.append(self.prompt_template % (lyrics[:self.max_lyrics_length], genre))
                index_map.append(i)

        instruct_texts = [
            self.tokenizer.apply_chat_template(
                [{"role": "user", "content": prompt}],
                tokenize=False,
                add_generation_prompt=True,
                enable_thinking=enable_thinking,
                do_sample=False
            )
            for prompt in prompts
        ]

        full_outputs = []
        raw_predictions = []
        self.model.eval()

        for i in range(0, len(instruct_texts), self.batch_size):
            batch_prompts = instruct_texts[i:i + self.batch_size]
            model_inputs = self.tokenizer(batch_prompts, return_tensors="pt", padding=True, truncation=True).to(self.device)

            with torch.no_grad():
                generated_ids = self.model.generate(
                    **model_inputs,
                    max_new_tokens=1024,
                    pad_token_id=self.tokenizer.eos_token_id
                )

            for j in range(len(batch_prompts)):
                output_ids = generated_ids[j][len(model_inputs["input_ids"][j]):].tolist()
                try:
                    split_idx = len(output_ids) - output_ids[::-1].index(151668)
                except ValueError:
                    split_idx = 0

                thinking = self.tokenizer.decode(output_ids[:split_idx], skip_special_tokens=True).strip()
                answer = self.tokenizer.decode(output_ids[split_idx:], skip_special_tokens=True).strip()
                full_outputs.append(thinking + answer)
                raw_predictions.append(answer)

        num_genres = len(self.genres)
        predictions = np.zeros((len(lyrics_list), num_genres), dtype=np.int32)

        for i, pred_text in enumerate(raw_predictions):
            sample_idx = index_map[i]
            genre_idx = i % num_genres
            try:
                predictions[sample_idx, genre_idx] = self._parse_response(pred_text)
            except Exception as e:
                logger.warning(f"Parse failed: {pred_text} | Error: {e}")

        if debug:
            for i, pred in enumerate(predictions):
                print(f"Sample {i} predicted genres: {[g for g, v in zip(self.genres, pred) if v]}")

        return predictions, full_outputs, instruct_texts


In [None]:
# Load your LoRA model and tokenizer
model = AutoModelForCausalLM.from_pretrained(
    "Qwen/Qwen3-0.6B",
    device_map="auto"
)
tokenizer = AutoTokenizer.from_pretrained("Qwen/Qwen3-0.6B")

from peft import PeftModel
model = PeftModel.from_pretrained(model, "./qwen_lora_adapter").to(device)

# Instantiate classifier
classifier = LoraClassifier(
    model=model,
    tokenizer=tokenizer,
    genres=genres,
    prompt_template=prompt_v1,
    device=device,
    max_lyrics_length=300,
    batch_size=16
)

# Run prediction
batch = next(iter(val_loader))
batch['features'] = batch['features'][:1]
predictions, outputs, prompts = classifier.predict(batch, enable_thinking=True, debug=True)


2025-05-27 18:03:21,244 - accelerate.utils.modeling - INFO - We will use 90% of the memory on device 0 for storing the model, and 10% for the buffer to avoid OOM. You can set `max_memory` in to a higher value to use more memory (at your own risk).


ValueError: Can't find 'adapter_config.json' at './qwen_lora_adapter'

: 