In [1]:
import sys
import os
from pathlib import Path

project_root = Path(os.getcwd()).parent
if str(project_root) not in sys.path:
    sys.path.append(str(project_root))

# Training

In [3]:
from summarization import SummarizationTrainer

CUDA available: False


In [6]:
training_args = {
    # ===== TRAINING =====
    "num_train_epochs": 5,
    "learning_rate": 2e-5,
    "per_device_train_batch_size": 4,
    "per_device_eval_batch_size": 4,
    "weight_decay": 0.01,
    "gradient_accumulation_steps": 4,

    # ===== EVALUATION / LOGGING / SAVING =====
    "evaluation_strategy": "epoch",
    "logging_strategy": "epoch",
    "save_strategy": "epoch",

    # ===== SEQ2SEQ SPECIFIC =====
    "predict_with_generate": True,
    "generation_max_length": 256,
    "generation_num_beams": 4,

    # ===== PERFORMANCE =====
    "fp16": False,
    "gradient_checkpointing": True,

    # ===== MODEL SELECTION =====
    "load_best_model_at_end": True,
    "metric_for_best_model": "rougeL",
    "greater_is_better": True,

    # ===== MISC =====
    "report_to": "none",

    # REQUIRED
    "output_dir": "../outputs/summarization",
}


trainer = SummarizationTrainer(config="llama")
trainer.prepare_data_and_trainer(
    training_args_dict=training_args,
    train_size=100,
    val_size=10,
    test_size=10
)

Loading CausalLM base model from meta-llama/Llama-3.2-3B-Instruct...


Downloading shards: 100%|██████████| 2/2 [32:28<00:00, 974.11s/it] 
Loading checkpoint shards: 100%|██████████| 2/2 [00:13<00:00,  6.60s/it]


Applying new LoRA configuration (task_type=CAUSAL_LM)...
Using device: mps
Task type: causal
trainable params: 2,293,760 || all params: 3,215,043,584 || trainable%: 0.0713
Preparing dataset...
Loading processed dataset from disk...
Failed to load dataset from disk. Tokenizing dataset...
Train size: 100 | Val size: 10 | Test size: 10


Map: 100%|██████████| 100/100 [00:00<00:00, 186.92 examples/s]
Map: 100%|██████████| 10/10 [00:00<00:00, 168.30 examples/s]
Map: 100%|██████████| 10/10 [00:00<00:00, 189.15 examples/s]
Saving the dataset (1/1 shards): 100%|██████████| 100/100 [00:00<00:00, 51839.13 examples/s]
Saving the dataset (1/1 shards): 100%|██████████| 10/10 [00:00<00:00, 5881.79 examples/s]
Saving the dataset (1/1 shards): 100%|██████████| 10/10 [00:00<00:00, 5803.66 examples/s]
  return Trainer(


Merging custom training arguments...


In [None]:
trainer.train()

In [None]:
trainer.test()

# Inference

In [2]:
from summarization import SummarizationInferencer

  from .autonotebook import tqdm as notebook_tqdm
  Referenced from: <367D4265-B20F-34BD-94EB-4F3EE47C385B> /Users/komangwikananda/miniconda3/envs/sona-ai/lib/python3.12/site-packages/torchvision/image.so
  warn(


CUDA available: False


In [3]:
text = f"Nik: I don't think we should apply this change. \nRen: I agree. The helmet make the side ears looks too small. The face proportion looks weird. \nNik: But, I like the helmet tho. It looks kinda nice. \nRen: Should I redraw and adjust this? So we can keep it. \nNik: Yeah, I really think wearing a helmet will make this character looks awesome. It is just this helmet is not suitable for him. But, the design is cool tho. Maybe for the other character. \nRen: Yeah, sure. I will redraw it, then I will comeback to you later. \nNik: Ok, sure."
print(text)

Nik: I don't think we should apply this change. 
Ren: I agree. The helmet make the side ears looks too small. The face proportion looks weird. 
Nik: But, I like the helmet tho. It looks kinda nice. 
Ren: Should I redraw and adjust this? So we can keep it. 
Nik: Yeah, I really think wearing a helmet will make this character looks awesome. It is just this helmet is not suitable for him. But, the design is cool tho. Maybe for the other character. 
Ren: Yeah, sure. I will redraw it, then I will comeback to you later. 
Nik: Ok, sure.


In [4]:
custom_prompt = (
"You are an expert meeting analyst."
"Extract structured information from the conversation.\n"
"Output format:\n"
"Topics:\n"
"- item\n\n"
"Concerns:\n"
"- item\n\n"
"Solutions:\n"
"- item\n\n"
"Next Steps:\n"
"- item\n\n"
)
print(custom_prompt)

You are an expert meeting analyst.Extract structured information from the conversation.
Output format:
Topics:
- item

Concerns:
- item

Solutions:
- item

Next Steps:
- item




In [5]:
inferencer = SummarizationInferencer(
    config="llama",
    base_model=True,
    use_pretrained=True,
    device="cpu",
    max_new_tokens=256,
    num_beams=4,
)
summary = inferencer.generate(text, prompt=custom_prompt, max_length=256)
print(summary)

Loading CausalLM base model from meta-llama/Llama-3.2-3B-Instruct...


Loading checkpoint shards: 100%|██████████| 2/2 [00:09<00:00,  4.91s/it]
Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Using device: cpu
Task type: causal
 Nik and Ren are discussing the design of a character. Nik wants to apply the change, but Ren is concerned that the helmet makes the character's side ears look too small and the face proportions look weird. Nik likes the helmet but thinks it doesn't suit the character. Ren agrees to redraw the helmet and come back to Nik later to show the revised design.



Structured Information:

Topics:
• Character design
• Helmet design
 

 Concerns:
• Helmet size and proportion
• Face proportions
 

 Solutions:
• Redraw the helmet
• Revise the character design

 Next Steps:
• Ren to redraw and revisit the design with Nik
• Nik to review and provide feedback to Ren
