# Fine-tuning Phi-3 Mini on Video Summaries

In [None]:
# Install necessary libraries if not already present
# Using -q for quieter output
# !pip install -q transformers torch accelerate einops pandas
# Ensure you have a version of transformers that supports Phi-3 (e.g., >= 4.39 or newer)

In [None]:
import pandas as pd
import json
import os
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer
from accelerate import Accelerator
# import einops # Often a dependency for newer models, import if needed by Phi-3 or training setup

## Device Configuration for Apple Silicon (MPS)

In [None]:
accelerator = Accelerator()
device_phi3 = accelerator.device
print(f"Selected device: {device_phi3}")

## Load Processed Video Summaries

In [None]:
input_summaries_file = '../data/video_summaries.jsonl'
loaded_summaries_list = []
try:
    with open(input_summaries_file, 'r', encoding='utf-8') as f:
        for line in f:
            loaded_summaries_list.append(json.loads(line))
    df_phi3_summaries = pd.DataFrame(loaded_summaries_list)
    if not df_phi3_summaries.empty:
        print(f"Successfully loaded {len(df_phi3_summaries)} summaries from {input_summaries_file}.")
        df_phi3_summaries.dropna(subset=['summary'], inplace=True)
        df_phi3_summaries = df_phi3_summaries[df_phi3_summaries['summary'].str.strip() != '']
        print(f"Number of summaries after filtering empty/NaN: {len(df_phi3_summaries)}")
    else:
        print(f"Loaded summaries file {input_summaries_file}, but it resulted in an empty DataFrame.")
        df_phi3_summaries = pd.DataFrame(columns=['video_id', 'title', 'original_transcript_length', 'summary'])
except FileNotFoundError:
    print(f"Error: Summaries file {input_summaries_file} not found. Please run notebook 07 (summarization part) first.")
    df_phi3_summaries = pd.DataFrame(columns=['video_id', 'title', 'original_transcript_length', 'summary'])
except json.JSONDecodeError as e:
    print(f"Error decoding JSON from {input_summaries_file}: {e}")
    df_phi3_summaries = pd.DataFrame(columns=['video_id', 'title', 'original_transcript_length', 'summary'])

if 'df_phi3_summaries' in globals() and df_phi3_summaries.empty:
    print("\nWarning: No summaries loaded. This notebook relies on summary data for fine-tuning Phi-3.")

## Load Phi-3 Mini Tokenizer and Prepare Data

In [None]:
phi3_model_id = "microsoft/Phi-3-mini-4k-instruct"
tokenizer_phi3 = None
try:
    tokenizer_phi3 = AutoTokenizer.from_pretrained(phi3_model_id, trust_remote_code=True)
    print(f"Tokenizer for '{phi3_model_id}' loaded successfully.")
    if tokenizer_phi3.pad_token is None:
        tokenizer_phi3.pad_token = tokenizer_phi3.eos_token 
        print(f"Set tokenizer_phi3.pad_token to: '{tokenizer_phi3.pad_token}'")
except Exception as e:
    print(f"Error loading Phi-3 tokenizer: {e}")

Note: The 'microsoft/Phi-3-mini-4k-instruct' model is an instruction-tuned model. For fine-tuning on plain text (like our summaries), we are treating each summary as a sequence for the model to learn to complete or generate in a similar style. For more conversational tasks, one would typically format the input using the model's specific chat template.

In [None]:
encodings_phi3_input_ids = []
encodings_phi3_attention_mask = []

if 'df_phi3_summaries' in globals() and not df_phi3_summaries.empty and 'summary' in df_phi3_summaries.columns and tokenizer_phi3 is not None:
    summary_list_phi3 = df_phi3_summaries['summary'].tolist()
    if summary_list_phi3:
        try:
            encodings_phi3 = tokenizer_phi3(
                summary_list_phi3, 
                truncation=True, 
                padding=True, 
                max_length=512, 
                return_attention_mask=True
            )
            encodings_phi3_input_ids = encodings_phi3['input_ids']
            encodings_phi3_attention_mask = encodings_phi3['attention_mask']
            print(f"Tokenized {len(encodings_phi3_input_ids)} summaries for Phi-3.")
            if encodings_phi3_input_ids:
                print(f"Example tokenized summary for Phi-3 (first 10 tokens): {encodings_phi3_input_ids[0][:10]}")
        except Exception as e:
            print(f"Error during Phi-3 summary tokenization: {e}")
    else:
        print("Summary list for Phi-3 is empty. No summaries to tokenize.")
else:
    print("DataFrame of summaries for Phi-3 is empty, 'summary' column is missing, or tokenizer_phi3 not loaded.")

## Create PyTorch Dataset and DataLoader for Phi-3

In [None]:
from torch.utils.data import Dataset, DataLoader 

dataset_phi3_summaries = None
dataloader_phi3_summaries = None

class Phi3SummaryDataset(Dataset):
    def __init__(self, input_ids_list, attention_masks_list):
        self.input_ids_list = input_ids_list
        self.attention_masks_list = attention_masks_list

    def __len__(self):
        return len(self.input_ids_list)

    def __getitem__(self, idx):
        input_ids = self.input_ids_list[idx]
        attention_mask = self.attention_masks_list[idx]
        if not isinstance(input_ids, list) or not isinstance(attention_mask, list):
            print(f"Warning: Data at index {idx} is not in list format. Skipping.")
            return {'input_ids': torch.tensor([], dtype=torch.long), 
                    'attention_mask': torch.tensor([], dtype=torch.long),
                    'labels': torch.tensor([], dtype=torch.long)}
        return {'input_ids': torch.tensor(input_ids, dtype=torch.long), 
                'attention_mask': torch.tensor(attention_mask, dtype=torch.long),
                'labels': torch.tensor(input_ids, dtype=torch.long).clone()} 

if encodings_phi3_input_ids and encodings_phi3_attention_mask: 
    if len(encodings_phi3_input_ids) == len(encodings_phi3_attention_mask):
        dataset_phi3_summaries = Phi3SummaryDataset(encodings_phi3_input_ids, encodings_phi3_attention_mask)
        print(f"Created Phi-3 summary dataset with {len(dataset_phi3_summaries)} samples.")
        if len(dataset_phi3_summaries) > 0:
            dataloader_phi3_summaries = DataLoader(dataset_phi3_summaries, batch_size=1, shuffle=True)
            print(f"Created Phi-3 summary DataLoader with batch_size=1.")
        else:
            print("Phi-3 summary dataset is empty. DataLoader not created.")
    else:
        print("Mismatch between length of input_ids and attention_mask for Phi-3. Cannot create dataset.")
else:
    print("No tokenized summaries or attention masks available for Phi-3. Cannot create Dataset/DataLoader.")

## Load Pre-trained Phi-3 Mini Model

In [None]:
model_phi3 = None
if 'phi3_model_id' in globals() and phi3_model_id and 'device_phi3' in globals():
    try:
        model_phi3 = AutoModelForCausalLM.from_pretrained(
            phi3_model_id, 
            trust_remote_code=True, 
            torch_dtype="auto" 
        )
        print(f"Pre-trained model '{phi3_model_id}' loaded successfully. It will be moved to {device_phi3} by Accelerator.")
    except Exception as e:
        print(f"Error loading pre-trained Phi-3 model '{phi3_model_id}': {e}")
else:
    print("phi3_model_id or device_phi3 not defined. Cannot load model.")

The following attempts full fine-tuning. This is memory-intensive for a ~3.8B parameter model on devices with limited VRAM/unified memory. If this fails, consider PEFT/LoRA (see section below).

## Attempting Full Fine-tuning of Phi-3 Mini

In [None]:
from torch.optim import AdamW

phi3_full_finetune_successful = False

if 'model_phi3' in globals() and model_phi3 is not None and \ 
   'dataloader_phi3_summaries' in globals() and dataloader_phi3_summaries is not None and \ 
   'accelerator' in globals():
    
    print(f"Preparing for full fine-tuning of Phi-3 on {accelerator.device}...")
    optimizer_phi3 = AdamW(model_phi3.parameters(), lr=3e-5) 

    model_phi3, optimizer_phi3, dataloader_phi3_summaries = accelerator.prepare(
        model_phi3, optimizer_phi3, dataloader_phi3_summaries
    )
    
    num_epochs_phi3 = 1 

    print("Starting full fine-tuning of Phi-3 Mini...")
    try:
        for epoch in range(num_epochs_phi3):
            model_phi3.train()
            total_loss_phi3 = 0
            print(f"Starting Phi-3 Fine-Tuning Epoch {epoch+1}/{num_epochs_phi3}")
            for batch_idx, batch in enumerate(dataloader_phi3_summaries):
                optimizer_phi3.zero_grad()
                
                outputs = model_phi3(
                    input_ids=batch['input_ids'], 
                    attention_mask=batch['attention_mask'], 
                    labels=batch['labels']
                )
                loss = outputs.loss
                
                if loss is not None:
                    accelerator.backward(loss) 
                    optimizer_phi3.step()
                    total_loss_phi3 += loss.item()
                    if batch_idx % 1 == 0: 
                       print(f"  Epoch {epoch+1}, Batch {batch_idx+1}/{len(dataloader_phi3_summaries)}, Loss: {loss.item():.4f}")
                else:
                    print(f"Warning: Loss is None for Phi-3 batch {batch_idx+1}. Skipping.")
            
            avg_loss_phi3 = total_loss_phi3 / len(dataloader_phi3_summaries) if len(dataloader_phi3_summaries) > 0 else 0
            accelerator.print(f"Epoch {epoch+1}/{num_epochs_phi3} - Average Training Loss: {avg_loss_phi3:.4f}")
        
        phi3_full_finetune_successful = True
        print("Full fine-tuning of Phi-3 Mini completed.")
    except Exception as e:
        print(f"Error during full fine-tuning of Phi-3: {e}")
        print("Full fine-tuning may have failed due to memory (OOM) or other issues. Consider PEFT/LoRA as an alternative.")
else:
    print("Required variables (model_phi3, dataloader_phi3_summaries, accelerator) not available. Skipping full fine-tuning.")

## Save Fully Fine-tuned Phi-3 Model

In [None]:
import os 
output_dir_phi3_full = './fine_tuned_phi3_mini_summaries_full'

if 'phi3_full_finetune_successful' in globals() and phi3_full_finetune_successful and \ 
   'model_phi3' in globals() and model_phi3 is not None and \ 
   'tokenizer_phi3' in globals() and tokenizer_phi3 is not None and \ 
   'accelerator' in globals():
    try:
        os.makedirs(output_dir_phi3_full, exist_ok=True)
        unwrapped_model_phi3 = accelerator.unwrap_model(model_phi3)
        unwrapped_model_phi3.save_pretrained(output_dir_phi3_full)
        tokenizer_phi3.save_pretrained(output_dir_phi3_full) 
        print(f"Fully fine-tuned Phi-3 model and tokenizer saved to {output_dir_phi3_full}")
    except Exception as e:
        print(f"Error saving fully fine-tuned Phi-3 model/tokenizer: {e}")
elif not ('phi3_full_finetune_successful' in globals() and phi3_full_finetune_successful):
    print("Phi-3 full fine-tuning was not completed or successful. Model not saved.")
else:
    print("Phi-3 model, tokenizer, or accelerator not available. Skipping saving of fully fine-tuned model.")

## Alternative: PEFT/LoRA Fine-tuning (If Full Fine-tuning Fails)

In [None]:
# If full fine-tuning is too resource-intensive, PEFT methods like LoRA can be used.
# This involves freezing most of the base model and training only small adapter layers.
# Example outline (requires `peft` library: !pip install -q peft):
#
# from peft import get_peft_model, LoraConfig, TaskType
#
# # It's often good to reload the base model to ensure no prior full fine-tuning attempts affect it,
# # especially if using a different precision like float16 for LoRA.
# # Ensure 'phi3_model_id' and 'device_phi3' are defined.
# # model_phi3_for_lora = AutoModelForCausalLM.from_pretrained(
# #     phi3_model_id, 
# #     trust_remote_code=True, 
# #     torch_dtype=torch.float16 # Example: use float16 for LoRA if supported
# # ).to(device_phi3)
#
# lora_config = LoraConfig(
#     r=16,  # Rank of the update matrices.
#     lora_alpha=32,  # Alpha parameter for LoRA scaling.
#     # For Phi-3, target_modules might include: "qkv_proj", "o_proj", "gate_up_proj", "down_proj" 
#     # or specific names like "Wqkv", "out_proj", "fc1", "fc2" depending on the exact architecture variant.
#     # It's crucial to inspect model_phi3.named_modules() to identify correct target layers.
#     target_modules=["Wqkv", "out_proj"], # Placeholder - MUST BE VERIFIED FOR PHI-3
#     lora_dropout=0.05,
#     bias="none",
#     task_type=TaskType.CAUSAL_LM
# )
#
# # model_phi3_lora = get_peft_model(model_phi3_for_lora, lora_config)
# # model_phi3_lora.print_trainable_parameters()
#
# # # Prepare for training with Accelerator if using LoRA model
# # optimizer_phi3_lora = AdamW(model_phi3_lora.parameters(), lr=1e-4) # May use different LR for LoRA
# # model_phi3_lora, optimizer_phi3_lora, dataloader_phi3_lora_prepared = accelerator.prepare(
# #     model_phi3_lora, optimizer_phi3_lora, dataloader_phi3_summaries # Reuse the same dataloader
# # )
#
# # # ... then, proceed with a similar training loop using model_phi3_lora and dataloader_phi3_lora_prepared ...
#
# # # Save LoRA adapters and tokenizer:
# # output_dir_phi3_lora = './fine_tuned_phi3_mini_summaries_lora'
# # if model_phi3_lora and tokenizer_phi3: # Check if LoRA model and tokenizer exist
# #     os.makedirs(output_dir_phi3_lora, exist_ok=True)
# #     unwrapped_lora_model = accelerator.unwrap_model(model_phi3_lora) # Unwrap if prepared with accelerator
# #     unwrapped_lora_model.save_pretrained(output_dir_phi3_lora)
# #     tokenizer_phi3.save_pretrained(output_dir_phi3_lora)
# #     print(f"LoRA adapters and tokenizer saved to {output_dir_phi3_lora}")
# # else:
# #     print("LoRA model or tokenizer not available. Skipping saving.")

print("PEFT/LoRA section provides a conceptual outline. \n",
      "Actual implementation requires installing 'peft', identifying correct 'target_modules' for Phi-3, \n",
      "and ensuring the training loop is correctly set up for the PEFT model.")

## Load Fine-tuned Phi-3 Mini for Inference

In [None]:
from transformers import AutoModelForCausalLM, AutoTokenizer # Ensure imports
import torch
import os

fine_tuned_phi3_full_dir = './fine_tuned_phi3_mini_summaries_full'
fine_tuned_phi3_lora_dir = './fine_tuned_phi3_mini_summaries_lora' # Placeholder for LoRA path
inference_model_phi3 = None
inference_tokenizer_phi3 = None

# Ensure device_phi3 is available (defined in an early cell)
if 'device_phi3' not in globals():
    accelerator_inf = Accelerator() # Re-init accelerator if needed for device context
    device_phi3 = accelerator_inf.device
    print(f"Re-initialized device for inference: {device_phi3}")

phi3_full_finetune_was_successful = globals().get('phi3_full_finetune_successful', False)

if phi3_full_finetune_was_successful and os.path.exists(fine_tuned_phi3_full_dir):
    print(f"Loading fully fine-tuned Phi-3 model from {fine_tuned_phi3_full_dir}...")
    try:
        inference_model_phi3 = AutoModelForCausalLM.from_pretrained(fine_tuned_phi3_full_dir, trust_remote_code=True, torch_dtype="auto")
        inference_tokenizer_phi3 = AutoTokenizer.from_pretrained(fine_tuned_phi3_full_dir, trust_remote_code=True)
        print("Successfully loaded fully fine-tuned model and tokenizer.")
    except Exception as e:
        print(f"Error loading fully fine-tuned model: {e}. Will attempt to load base model.")
        inference_model_phi3 = None # Reset on error
        inference_tokenizer_phi3 = None
else:
    print("Fully fine-tuned Phi-3 model not found or training was not successful.")

# Placeholder for LoRA loading - this part would need to be fleshed out if LoRA training was actually implemented
# if inference_model_phi3 is None and os.path.exists(fine_tuned_phi3_lora_dir):
#     print(f"Attempting to load base model and LoRA adapters from {fine_tuned_phi3_lora_dir}...")
#     try:
#         from peft import PeftModel # Requires peft library
#         if 'phi3_model_id' not in globals(): phi3_model_id = "microsoft/Phi-3-mini-4k-instruct"
#         base_model_for_lora_inf = AutoModelForCausalLM.from_pretrained(phi3_model_id, trust_remote_code=True, torch_dtype="auto")
#         inference_model_phi3 = PeftModel.from_pretrained(base_model_for_lora_inf, fine_tuned_phi3_lora_dir)
#         inference_tokenizer_phi3 = AutoTokenizer.from_pretrained(fine_tuned_phi3_lora_dir, trust_remote_code=True)
#         print("Successfully loaded base model with LoRA adapters.")
#     except Exception as e:
#         print(f"Error loading LoRA model: {e}. Will attempt to load base model.")
#         inference_model_phi3 = None
#         inference_tokenizer_phi3 = None

if inference_model_phi3 is None: # Fallback to base model if no fine-tuned version loaded
    print("Loading base Phi-3 model for inference as no fine-tuned version was loaded.")
    try:
        if 'phi3_model_id' not in globals(): phi3_model_id = "microsoft/Phi-3-mini-4k-instruct"
        inference_model_phi3 = AutoModelForCausalLM.from_pretrained(phi3_model_id, trust_remote_code=True, torch_dtype="auto")
        inference_tokenizer_phi3 = AutoTokenizer.from_pretrained(phi3_model_id, trust_remote_code=True)
        # Ensure pad token is set for base model tokenizer as well
        if inference_tokenizer_phi3.pad_token is None:
            inference_tokenizer_phi3.pad_token = inference_tokenizer_phi3.eos_token
            print(f"Set pad_token for base Phi-3 tokenizer to: '{inference_tokenizer_phi3.pad_token}'")
        print("Base Phi-3 model and tokenizer loaded.")
    except Exception as e:
        print(f"Error loading base Phi-3 model: {e}")

if inference_model_phi3 and inference_tokenizer_phi3:
    inference_model_phi3 = inference_model_phi3.to(device_phi3)
    inference_model_phi3.eval()
    print(f"Phi-3 inference model is on device: {inference_model_phi3.device} and in eval mode.")
else:
    print("Could not load any model for Phi-3 inference.")

## Text Generation with Fine-tuned Phi-3 Mini

Generation quality and style will depend heavily on the success and extent of fine-tuning. If using an instruct model like Phi-3-mini-instruct, applying the correct chat template during inference (and possibly during fine-tuning) can be crucial for optimal results, especially if the fine-tuning data was not formatted that way.

In [None]:
def generate_text_phi3(prompt_text, model, tokenizer, device, max_new_tokens=100):
    if not model or not tokenizer:
        print("Inference model or tokenizer for Phi-3 not available.")
        return []
    try:
        # For Phi-3 instruct, the recommended way is using the chat template if available and appropriate.
        # However, since we fine-tuned on plain summaries, direct encoding might be what we test first.
        # If fine-tuning was on formatted prompts, this part needs to match that format.
        # messages = [{ "role": "user", "content": prompt_text }]
        # formatted_prompt = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
        # input_ids = tokenizer.encode(formatted_prompt, return_tensors="pt", truncation=True).to(device)
        
        # Using direct encoding as per our fine-tuning data format (plain summaries)
        input_ids = tokenizer.encode(prompt_text, return_tensors="pt", truncation=True).to(device)
        
        output_sequences = model.generate(
            input_ids,
            max_new_tokens=max_new_tokens,
            # num_beams=5, # Optional: Beam search
            early_stopping=True,
            pad_token_id=tokenizer.pad_token_id if tokenizer.pad_token_id is not None else tokenizer.eos_token_id,
            eos_token_id=tokenizer.eos_token_id,
            temperature=0.7, 
            top_p=0.9,       
            do_sample=True   # Enable sampling for temperature/top_p
        )
        
        # Decode only the newly generated tokens, not the prompt
        generated_text = tokenizer.decode(output_sequences[0][input_ids.shape[-1]:], skip_special_tokens=True)
        return [generated_text] # Return as a list to match other generation functions
    except Exception as e:
        print(f"Error during Phi-3 text generation: {e}")
        return []

In [None]:
if 'inference_model_phi3' in globals() and inference_model_phi3 and \ 
   'inference_tokenizer_phi3' in globals() and inference_tokenizer_phi3 and \ 
   'device_phi3' in globals():
    
    prompts_for_phi3 = [
        "This video summary is about", 
        "Key takeaways include", 
        "This video explains how to",
        "An interesting point made was about",
        "To learn more about this, you should"
    ]
    
    print("\n--- Phi-3 Mini (Summary-Tuned) Text Generation Demo ---")
    for p in prompts_for_phi3:
        generated_texts_phi3 = generate_text_phi3(
            p, 
            inference_model_phi3, 
            inference_tokenizer_phi3, 
            device_phi3, 
            max_new_tokens=75 # Keep generated sequence length reasonable for summary-like output
        )
        print(f"\nPrompt: {p}...")
        if generated_texts_phi3:
            for i, g in enumerate(generated_texts_phi3):
                print(f"Generated {i+1}: {g}")
        else:
            print("  (No text generated or error occurred)")
        print("-" * 30)
else:
    print("Phi-3 inference model, tokenizer, or device not loaded/defined. Cannot demonstrate generation.")