# Install core dependencies

* This notebook can be run everywhere without clone the repo
* Commented out anything related to flash-att and HF_TOKEN if you have some trouble with them!

In [None]:
# Install core dependencies
!pip install transformers torch pandas

# For faster inference (important)
!pip install unsloth accelerate bitsandbytes

# Commented out anything related to flash-att and HF_TOKEN if you have some trouble with them!
# Flash Attention (highly recommended for speed)
!pip install flash-attn --no-build-isolation

# For dataset handling and YAML parsing
!pip install datasets pyyaml



In [19]:
import os
import time
import json
import pandas as pd
from datasets import Dataset, load_from_disk
from datetime import datetime
import warnings

# Suppress warnings
warnings.filterwarnings("ignore")

# Import Wandb for experiment tracking
import wandb

# Import Unsloth

import unsloth

# Import HuggingFace libraries

# Try to import HF token from environment
HF_TOKEN = os.environ.get("HF_TOKEN", None)

# Disable HuggingFace tokenizers parallelism warning
os.environ["TOKENIZERS_PARALLELISM"] = "false"

# Implementation of PromptCreator class

In [20]:
class PromptCreator:
    """
    Creates and formats prompts for multiple choice questions
    Supports different prompt styles for training and inference
    """

    # Prompt types
    BASIC = "basic"  # Simple answer-only format
    YAML_REASONING = "yaml"  # YAML formatted reasoning
    TEACHER_REASONED = (
        "teacher"  # Same YAML format as YAML_REASONING but using teacher completions for training
    )

    def __init__(self, prompt_type=BASIC):
        """
        Initialize prompt creator with the specified type

        Args:
            prompt_type: Type of prompts to generate - "basic", "yaml", or "teacher"
                         Note: "teacher" uses same prompt format as "yaml" but with teacher completions
        """
        # For prompt formatting, teacher_reasoned is equivalent to yaml_reasoning
        # The difference only matters during training when using teacher completions
        if prompt_type == self.TEACHER_REASONED:
            prompt_type = self.YAML_REASONING

        self.prompt_type = prompt_type
        # Store the original prompt type to track if we're using teacher mode
        self.original_type = prompt_type

    def format_choices(self, choices):
        """Format choices as a lettered list"""
        return "\n".join([f"{chr(65 + i)}. {choice}" for i, choice in enumerate(choices)])

    def get_max_letter(self, choices):
        """Get the maximum letter based on number of choices"""
        return chr(65 + len(choices) - 1)

    def create_inference_prompt(self, question, choices):
        """
        Create a prompt for inference based on current prompt type

        Args:
            question: The question text
            choices: List of choices

        Returns:
            Formatted prompt string
        """
        formatted_choices = self.format_choices(choices)
        max_letter = self.get_max_letter(choices)

        if self.prompt_type == self.YAML_REASONING:
            return self._create_yaml_prompt(question, formatted_choices, max_letter)
        else:
            return self._create_basic_prompt(question, formatted_choices, max_letter)

    def _create_basic_prompt(self, question, formatted_choices, max_letter):
        """Create a basic prompt asking for just the answer letter"""
        return f"""
QUESTION:
{question}

CHOICES:
{formatted_choices}

Answer with a single letter from A through {max_letter} without any additional explanation or commentary.
"""

    def _create_yaml_prompt(self, question, formatted_choices, max_letter):
        """Create a prompt requesting YAML-formatted reasoning"""
        return f"""
QUESTION:
{question}

CHOICES:
{formatted_choices}

Analyze this question step-by-step and provide a detailed explanation.
Your response MUST be in YAML format as follows:

understanding: |
  <your understanding of what the question is asking>
analysis: |
  <your analysis of each option>
reasoning: |
  <your step-by-step reasoning process>
conclusion: |
  <your final conclusion>
answer: <single letter A through {max_letter}>

The answer field MUST contain ONLY a single character letter.
"""

    def create_training_prompt(self, question, choices):
        """
        Create a prompt for training with the current prompt type

        Args:
            question: The question text
            choices: List of choices

        Returns:
            Formatted prompt string for training
        """
        formatted_choices = self.format_choices(choices)
        max_letter = self.get_max_letter(choices)

        if self.prompt_type == self.YAML_REASONING:
            return self._create_yaml_training_prompt(question, formatted_choices, max_letter)
        else:
            return self._create_basic_training_prompt(question, formatted_choices, max_letter)

    def _create_basic_training_prompt(self, question, formatted_choices, max_letter):
        """Create a basic training prompt"""
        return f"""
QUESTION:
{question}

CHOICES:
{formatted_choices}

The answer is a single letter (A, B, C, etc.). Only provide ONE character as your answer:
"""

    def _create_yaml_training_prompt(self, question, formatted_choices, max_letter):
        """Create a YAML-formatted training prompt"""
        return f"""
QUESTION:
{question}

CHOICES:
{formatted_choices}

Analyze this question step-by-step and provide a detailed explanation.
Follow the YAML format in your response:

understanding: |
  <your understanding of the question>
analysis: |
  <your analysis of each option>
reasoning: |
  <your reasoning about the correct answer>
conclusion: |
  <your final conclusion>
answer: <single letter A through {max_letter}>
"""

    def set_prompt_type(self, prompt_type):
        """Set the prompt type"""
        # For prompt formatting, teacher_reasoned is equivalent to yaml_reasoning
        self.original_type = prompt_type  # Store the original type

        if prompt_type == self.TEACHER_REASONED:
            # prompt_type = self.YAML_REASONING
            pass

        self.prompt_type = prompt_type
        return self

    def is_teacher_mode(self):
        """Check if we're using teacher mode (for training with teacher completions)"""
        return self.original_type == self.TEACHER_REASONED

# Implementation of ResponseParser class

In [21]:
class ResponseParser:
    """
    Parser for model responses with support for different formats
    Extracts answers and reasoning from model outputs
    """

    # Parser modes
    BASIC = "basic"  # Extract single letter answer
    YAML = "yaml"  # Parse YAML formatted response with reasoning

    def __init__(self, parser_mode=BASIC):
        """
        Initialize with specified parser mode

        Args:
            parser_mode: Mode to use for parsing - "basic" or "yaml"
        """
        self.parser_mode = parser_mode

    def parse(self, response_text):
        """
        Parse the model's response according to the current mode

        Args:
            response_text: Raw response text from the model

        Returns:
            Tuple of (answer, reasoning)
        """
        if self.parser_mode == self.YAML:
            return self._parse_yaml_response(response_text)
        else:
            return self._parse_basic_response(response_text)

    def _parse_basic_response(self, response_text):
        """
        Parse basic response looking for a letter answer

        For basic mode, we look for a single letter (A-Z) with minimal reasoning
        """
        import re

        # Try to extract a single letter answer (A-Z)
        answer_match = re.search(r"(?:^|\s)([A-Z])(?:\s|$|\.)", response_text)
        if answer_match:
            answer = answer_match.group(1)
        else:
            # Take first character if it's a letter
            if response_text and response_text[0].isalpha():
                answer = response_text[0].upper()
            else:
                answer = None

        # For basic mode, we don't extract detailed reasoning
        reasoning = ""

        return answer, reasoning

    def _parse_yaml_response(self, response_text):
        """
        Parse YAML formatted response extracting answer and reasoning

        For YAML mode, we try to extract both the answer and structured reasoning
        """
        import re
        import yaml

        # First try to find answer in YAML format
        yaml_match = re.search(r"answer:\s*([A-Z])", response_text)
        if yaml_match:
            answer = yaml_match.group(1)
        else:
            # Fall back to basic extraction if YAML parsing fails
            answer_match = re.search(r"(?:^|\s)([A-Z])(?:\s|$|\.)", response_text)
            if answer_match:
                answer = answer_match.group(1)
            elif response_text and response_text[0].isalpha():
                answer = response_text[0].upper()
            else:
                answer = None

        # Try to parse reasoning from YAML format
        reasoning = ""
        if "reasoning:" in response_text:
            yaml_content = yaml.safe_load("---\n" + response_text)
            if isinstance(yaml_content, dict) and "reasoning" in yaml_content:
                reasoning = yaml_content["reasoning"]

                # Add other YAML fields if available
                if "understanding" in yaml_content:
                    reasoning = f"Understanding: {yaml_content['understanding']}\n\n{reasoning}"
                if "conclusion" in yaml_content:
                    reasoning = f"{reasoning}\n\nConclusion: {yaml_content['conclusion']}"
        else:
            # Use the full response as reasoning if not in YAML format
            reasoning = response_text

        return answer, reasoning

    def set_parser_mode(self, parser_mode):
        """Set the parser mode"""
        self.parser_mode = parser_mode
        return self

    @classmethod
    def from_prompt_type(cls, prompt_type):
        """
        Create a parser instance with mode matching the prompt type

        Args:
            prompt_type: Prompt type from PromptCreator

        Returns:
            ResponseParser instance with appropriate mode
        """
        if (
            prompt_type == PromptCreator.YAML_REASONING
            or prompt_type == PromptCreator.TEACHER_REASONED
        ):
            return cls(parser_mode=cls.YAML)
        else:
            return cls(parser_mode=cls.BASIC)

# Implementation of QwenModelHandler class


In [22]:
class QwenModelHandler:
    """Handler for Qwen models with inference and saving capabilities using Unsloth"""

    def __init__(
        self,
        model_name="unsloth/Qwen2.5-7B",
        max_seq_length=768,
        quantization=None,
        device_map="auto",
        cache_dir=None,
    ):
        """
        Initialize model and tokenizer using Unsloth

        Args:
            model_name: Name or path of the model (preferably an unsloth model)
            max_seq_length: Maximum sequence length for the model
            quantization: Quantization type (None, '4bit', '8bit') - for compatibility
            device_map: Device mapping strategy
            cache_dir: Cache directory for models
        """
        self.model_name = model_name
        self.max_seq_length = max_seq_length
        self.device_map = device_map
        self.quantization = quantization
        self.cache_dir = cache_dir

        # Convert quantization parameter to load_in_4bit parameter for Unsloth
        self.load_in_4bit = quantization == "4bit"

        # Load tokenizer and model
        self.tokenizer, self.model = self._load_model()
        self.response_parser = ResponseParser()

    def _load_model(self):
        """Load model and tokenizer with Unsloth for optimization"""
        from unsloth import FastLanguageModel
        import torch

        print(f"Loading {self.model_name} with Unsloth, max_seq_length={self.max_seq_length}")

        # Set dtype based on hardware
        dtype = None  # None for auto detection

        # Load model and tokenizer with Unsloth
        model, tokenizer = FastLanguageModel.from_pretrained(
            model_name=self.model_name,
            max_seq_length=self.max_seq_length,
            dtype=dtype,
            load_in_4bit=self.load_in_4bit,
            cache_dir=self.cache_dir,
        )

        return tokenizer, model

    def generate_with_streaming(
        self, prompt, temperature=0.7, max_tokens=1024, stream=True, use_cache=True
    ):
        """
        Generate completion with optional streaming using Unsloth's optimized inference
        """
        # Enable faster inference
        from unsloth import FastLanguageModel

        FastLanguageModel.for_inference(self.model)

        # Format as chat
        messages = [{"role": "user", "content": prompt}]
        chat_text = self.tokenizer.apply_chat_template(
            messages, tokenize=False, add_generation_prompt=True
        )

        # Tokenize input
        model_inputs = self.tokenizer([chat_text], return_tensors="pt").to(self.model.device)

        # Generate with streaming if requested
        if stream:
            from transformers import TextIteratorStreamer
            import threading

            # Set up streamer
            streamer = TextIteratorStreamer(
                self.tokenizer, skip_prompt=True, skip_special_tokens=True
            )

            # Start generation in a thread
            generation_kwargs = {
                "input_ids": model_inputs.input_ids,
                "attention_mask": model_inputs.attention_mask,
                "temperature": temperature,
                "max_new_tokens": max_tokens,
                "streamer": streamer,
                "do_sample": temperature > 0.0,
                "use_cache": use_cache,  # Important for Unsloth performance
                "min_p": (
                    0.1 if temperature > 0.0 else None
                ),  # Optional: Unsloth recommends this for better quality
            }

            thread = threading.Thread(target=self.model.generate, kwargs=generation_kwargs)
            thread.start()

            # Return the streamer that yields text chunks
            return streamer
        else:
            # Generate without streaming
            generated_ids = self.model.generate(
                input_ids=model_inputs.input_ids,
                attention_mask=model_inputs.attention_mask,
                temperature=temperature,
                max_new_tokens=max_tokens,
                do_sample=temperature > 0.0,
                use_cache=use_cache,  # Important for Unsloth performance
                min_p=0.1 if temperature > 0.0 else None,  # Optional: Unsloth recommends this
            )

            # Decode the generated text
            generated_text = self.tokenizer.decode(
                generated_ids[0][model_inputs.input_ids.shape[1] :], skip_special_tokens=True
            )

            return generated_text

    def calculate_perplexity(self, prompt, answer, temperature=0.0):
        """
        Calculate perplexity for a prompt and answer pair

        Args:
            prompt: The input prompt
            answer: The expected answer
            temperature: Sampling temperature

        Returns:
            Perplexity score
        """
        import torch

        # Format chat for perplexity calculation
        messages = [{"role": "user", "content": prompt}, {"role": "assistant", "content": answer}]
        chat_text = self.tokenizer.apply_chat_template(messages, tokenize=False)

        # Tokenize the text
        encodings = self.tokenizer(chat_text, return_tensors="pt").to(self.model.device)

        # Calculate loss
        with torch.no_grad():
            outputs = self.model(**encodings, labels=encodings.input_ids)

        # Get loss and calculate perplexity
        neg_log_likelihood = outputs.loss.item()
        perplexity = torch.exp(torch.tensor(neg_log_likelihood)).item()

        return perplexity

    def save_model(self, output_dir, save_method="lora"):
        """
        Save model to disk using Unsloth's optimized methods

        Args:
            output_dir: Directory to save the model
            save_method: Method to use for saving ("lora", "merged_16bit", "merged_4bit", "gguf")
        """
        import os

        os.makedirs(output_dir, exist_ok=True)

        # Use Unsloth's saving methods
        if save_method == "lora":
            # Save LoRA weights
            self.model.save_pretrained(output_dir)
            self.tokenizer.save_pretrained(output_dir)
        elif save_method == "merged_16bit":
            # Save merged model in float16
            self.model.save_pretrained_merged(
                output_dir, self.tokenizer, save_method="merged_16bit"
            )
        elif save_method == "merged_4bit":
            # Save merged model in 4bit
            self.model.save_pretrained_merged(output_dir, self.tokenizer, save_method="merged_4bit")
        elif save_method == "gguf":
            # Save in GGUF format for llama.cpp
            self.model.save_pretrained_gguf(
                output_dir, self.tokenizer, quantization_method="q4_k_m"
            )
        else:
            raise ValueError(f"Unknown save method: {save_method}")

        print(f"Model saved to {output_dir} using method {save_method}")
        return output_dir

    def push_to_hub(self, repo_id, token=None, save_method="lora", private=False):
        """
        Push model to Hugging Face Hub using Unsloth's optimized methods
        """
        # Use Unsloth's hub methods directly
        if save_method == "lora":
            self.model.push_to_hub_merged(repo_id, self.tokenizer, save_method="lora", token=token)
        elif save_method == "merged_16bit":
            self.model.push_to_hub_merged(
                repo_id, self.tokenizer, save_method="merged_16bit", token=token
            )
        elif save_method == "merged_4bit":
            self.model.push_to_hub_merged(
                repo_id, self.tokenizer, save_method="merged_4bit", token=token
            )
        elif save_method == "gguf":
            # Push multiple GGUF variants
            self.model.push_to_hub_gguf(
                repo_id, self.tokenizer, quantization_method=["q4_k_m", "q5_k_m"], token=token
            )
        else:
            raise ValueError(f"Unknown save method: {save_method}")

        print(f"Model successfully pushed to: https://huggingface.co/{repo_id}")
        return f"https://huggingface.co/{repo_id}"

# Implementation of ResponseParser class

# Implementation of MultipleChoiceTester class

In [23]:
class MultipleChoiceTester:
    """Framework for testing Qwen models on multiple choice questions"""

    def __init__(self, model_handler, prompt_creator=None):
        """
        Initialize with model handler and prompt configuration

        Args:
            model_handler: The QwenModelHandler instance
            prompt_creator: Optional PromptCreator instance (will create one if not provided)
        """
        self.model_handler = model_handler
        self.prompt_creator = prompt_creator or PromptCreator(PromptCreator.BASIC)
        # Create a response parser matching the prompt type
        self.response_parser = ResponseParser.from_prompt_type(self.prompt_creator.prompt_type)

    def infer_example(
        self,
        example,
        temperature=0.7,
        max_tokens=1024,
        prompt_type=None,
        stream=False,
        use_cache=False,
    ):
        """
        Mode 1: Inference on a single example for visualization/demonstration

        Args:
            example: Single example to infer (dict with question, choices, etc.)
            temperature: Sampling temperature for generation
            max_tokens: Maximum tokens to generate
            prompt_type: Optional override for prompt type
            stream: Whether to stream the output

        Returns:
            Dictionary with prediction and metrics
        """
        # Allow temporary override of prompt type
        original_prompt_type = None
        if prompt_type is not None:
            original_prompt_type = self.prompt_creator.prompt_type
            self.prompt_creator.set_prompt_type(prompt_type)
            # Update response parser to match prompt type
            self.response_parser = ResponseParser.from_prompt_type(prompt_type)

        # Prepare data
        question = example["question"]

        # Handle different formats of choices
        if isinstance(example["choices"], list):
            choices = example["choices"]
        elif isinstance(example["choices"], str) and example["choices"].startswith("["):
            # Parse string representation of list
            import ast

            choices = (
                ast.literal_eval(example["choices"])
                if "[" in example["choices"]
                else example["choices"].split(",")
            )
        else:
            choices = str(example["choices"]).split(",")

        # Generate the prompt using prompt creator
        prompt = self.prompt_creator.create_inference_prompt(question, choices)

        # Start timing
        start_time = time.time()

        if stream:
            # Use streaming generation
            streamer = self.model_handler.generate_with_streaming(
                prompt=prompt,
                temperature=temperature,
                max_tokens=max_tokens,
                stream=stream,
                use_cache=use_cache,
            )

            # Collect output from streamer
            raw_response = ""
            print("Model response:")
            for text_chunk in streamer:
                print(text_chunk, end="", flush=True)
                raw_response += text_chunk
            print("\n")
        else:
            # Generate without streaming
            raw_response = self.model_handler.generate_with_streaming(
                prompt=prompt,
                temperature=temperature,
                max_tokens=max_tokens,
                stream=stream,
                use_cache=use_cache,
            )

        response_time = time.time() - start_time

        # Parse the response using the response parser
        predicted_answer, reasoning = self.response_parser.parse(raw_response)

        # Prepare results
        result = {
            "question": question,
            "choices": choices,
            "predicted_answer": predicted_answer,
            "reasoning": reasoning,
            "response_time": response_time,
            "raw_response": raw_response,
            "prompt_type": self.prompt_creator.prompt_type,
        }

        # Add task_id if available
        if "task_id" in example:
            result["task_id"] = example["task_id"]

        # Calculate metrics if label is provided
        if "answer" in example:
            label = example["answer"]
            result["correct_answer"] = label
            result["is_correct"] = predicted_answer == label

            # Calculate perplexity if requested
            if hasattr(self.model_handler, "calculate_perplexity"):
                perplexity = self.model_handler.calculate_perplexity(prompt, raw_response)
                result["perplexity"] = perplexity

        # Restore original prompt type if it was overridden
        if original_prompt_type is not None:
            self.prompt_creator.set_prompt_type(original_prompt_type)
            # Restore the original response parser
            self.response_parser = ResponseParser.from_prompt_type(original_prompt_type)

        return result

    def infer_batch(
        self, examples, temperature=0.7, max_tokens=1024, prompt_type=None, batch_size=4
    ):
        """
        Mode 2: Inference on a batch of examples

        Args:
            examples: List of examples to infer
            temperature: Sampling temperature
            max_tokens: Maximum tokens to generate
            prompt_type: Optional override for prompt type
            batch_size: Size of batches for processing

        Returns:
            List of result dictionaries and summary metrics
        """
        # Allow temporary override of prompt type
        original_prompt_type = None
        if prompt_type is not None:
            original_prompt_type = self.prompt_creator.prompt_type
            self.prompt_creator.set_prompt_type(prompt_type)
            # Update response parser to match prompt type
            self.response_parser = ResponseParser.from_prompt_type(prompt_type)

        # Prepare all prompts
        prompts = []
        metadata = []

        for i, example in enumerate(examples):
            # Extract data
            question = example["question"]

            # Handle different formats of choices
            if isinstance(example["choices"], list):
                choices = example["choices"]
            elif isinstance(example["choices"], str) and example["choices"].startswith("["):
                # Parse string representation of list
                import ast

                choices = (
                    ast.literal_eval(example["choices"])
                    if "[" in example["choices"]
                    else example["choices"].split(",")
                )
            else:
                choices = str(example["choices"]).split(",")

            # Generate the prompt using prompt creator
            prompt = self.prompt_creator.create_inference_prompt(question, choices)
            prompts.append(prompt)

            # Store metadata for later
            meta = {
                "question": question,
                "choices": choices,
                "index": i,
            }

            # Add label if available
            if "answer" in example:
                meta["label"] = example["answer"]

            if "task_id" in example:
                meta["task_id"] = example["task_id"]

            metadata.append(meta)

        # Process in batches
        results = []
        correct_count = 0
        total_count = 0
        perplexities = []

        for i in range(0, len(prompts), batch_size):
            batch_prompts = prompts[i : i + batch_size]
            batch_meta = metadata[i : i + batch_size]

            # Process batch
            start_time = time.time()
            batch_responses = []

            for prompt in batch_prompts:
                response = self.model_handler.generate_with_streaming(
                    prompt=prompt, temperature=temperature, max_tokens=max_tokens, stream=False
                )
                batch_responses.append(response)

            batch_time = time.time() - start_time

            # Process each response in the batch
            for j, (response, meta) in enumerate(zip(batch_responses, batch_meta)):
                # Parse response
                predicted_answer, reasoning = self.response_parser.parse(response)

                # Create result
                result = {
                    "question": meta["question"],
                    "choices": meta["choices"],
                    "predicted_answer": predicted_answer,
                    "reasoning": reasoning,
                    "raw_response": response,
                    "prompt_type": self.prompt_creator.prompt_type,
                    "response_time": batch_time / len(batch_prompts),  # Approximate individual time
                }

                # Add task_id if available
                if "task_id" in meta:
                    result["task_id"] = meta["task_id"]

                # Add metrics if label available
                if "label" in meta:
                    label = meta["label"]
                    result["correct_answer"] = label
                    result["is_correct"] = predicted_answer == label

                    # Update counts for accuracy
                    total_count += 1
                    if result["is_correct"]:
                        correct_count += 1

                    # Calculate perplexity if possible
                    if hasattr(self.model_handler, "calculate_perplexity"):
                        prompt = batch_prompts[j]
                        perplexity = self.model_handler.calculate_perplexity(prompt, response)
                        result["perplexity"] = perplexity
                        perplexities.append(perplexity)

                results.append(result)

        # Calculate aggregate metrics
        summary_metrics = {}
        if total_count > 0:
            summary_metrics["accuracy"] = correct_count / total_count
            summary_metrics["correct_count"] = correct_count
            summary_metrics["total_count"] = total_count

            if perplexities:
                summary_metrics["avg_perplexity"] = sum(perplexities) / len(perplexities)
                summary_metrics["min_perplexity"] = min(perplexities)
                summary_metrics["max_perplexity"] = max(perplexities)

        # Restore original prompt type if it was overridden
        if original_prompt_type is not None:
            self.prompt_creator.set_prompt_type(original_prompt_type)
            # Restore the original response parser
            self.response_parser = ResponseParser.from_prompt_type(original_prompt_type)

        return results, summary_metrics

    def evaluate_dataset(
        self,
        dataset,
        temperature=0.7,
        max_tokens=1024,
        num_examples=None,
        verbose=True,
        prompt_type=None,
        batch_size=4,
        log_to_wandb=False,
    ):
        """
        Mode 3: Inference on a whole dataset with metrics calculation

        Args:
            dataset: Dataset to evaluate
            temperature: Sampling temperature
            max_tokens: Maximum tokens to generate
            num_examples: Number of examples to evaluate (None for all)
            verbose: Whether to print progress information
            prompt_type: Override the prompt type for this evaluation
            batch_size: Size of batches for processing
            log_to_wandb: Whether to log results to wandb

        Returns:
            Summary dictionary with results and metrics
        """
        # Allow overriding the prompt type for this evaluation
        original_prompt_type = self.prompt_creator.prompt_type
        if prompt_type is not None:
            self.prompt_creator.set_prompt_type(prompt_type)
            # Update response parser to match prompt type
            self.response_parser = ResponseParser.from_prompt_type(prompt_type)

        # Select subset if specified
        if num_examples is not None:
            dataset = dataset.select(range(min(num_examples, len(dataset))))

        results = []
        correct_count = 0
        total_count = 0
        perplexities = []

        # Process examples in batches
        for i in range(0, len(dataset), batch_size):
            batch_examples = dataset[i : i + batch_size]

            if verbose:
                batch_desc = (
                    f"Batch {i//batch_size + 1}/{(len(dataset) + batch_size - 1) // batch_size}"
                )
                print(f"\nProcessing {batch_desc} with {len(batch_examples)} examples...")

            # Infer batch
            batch_results, batch_metrics = self.infer_batch(
                examples=batch_examples,
                temperature=temperature,
                max_tokens=max_tokens,
                batch_size=batch_size,
            )

            # Update metrics
            results.extend(batch_results)
            if "correct_count" in batch_metrics:
                correct_count += batch_metrics["correct_count"]
                total_count += batch_metrics["total_count"]

                if verbose:
                    batch_accuracy = batch_metrics["accuracy"]
                    overall_accuracy = correct_count / total_count
                    print(
                        f"Batch accuracy: {batch_accuracy:.2%}, Overall: {overall_accuracy:.2%} ({correct_count}/{total_count})"
                    )

            # Collect perplexities
            if "avg_perplexity" in batch_metrics:
                for result in batch_results:
                    if "perplexity" in result:
                        perplexities.append(result["perplexity"])

        # Calculate final accuracy
        accuracy = correct_count / total_count if total_count > 0 else 0.0

        if verbose:
            prompt_type_str = self.prompt_creator.prompt_type
            print(
                f"\nFinal accuracy with {prompt_type_str} prompts: {accuracy:.2%} ({correct_count}/{total_count})"
            )
            if perplexities:
                avg_perplexity = sum(perplexities) / len(perplexities)
                print(f"Average perplexity: {avg_perplexity:.4f}")

        # Prepare comprehensive summary
        summary = {
            "accuracy": accuracy,
            "correct_count": correct_count,
            "total_count": total_count,
            "prompt_type": self.prompt_creator.prompt_type,
            "results": results,
        }

        # Add perplexity metrics if available
        if perplexities:
            summary["avg_perplexity"] = sum(perplexities) / len(perplexities)
            summary["min_perplexity"] = min(perplexities)
            summary["max_perplexity"] = max(perplexities)

        # Log results to wandb if requested
        if log_to_wandb and wandb.run is not None:
            metrics = {
                "test/accuracy": accuracy,
                "test/correct_count": correct_count,
                "test/total_count": total_count,
            }
            if perplexities:
                metrics["test/avg_perplexity"] = summary["avg_perplexity"]
                metrics["test/min_perplexity"] = summary["min_perplexity"]
                metrics["test/max_perplexity"] = summary["max_perplexity"]

            wandb.log(metrics)

            # Create a table of results for visualization if task_id exists
            if "task_id" in dataset.features:
                columns = [
                    "task_id",
                    "question",
                    "correct_answer",
                    "predicted_answer",
                    "is_correct",
                ]
                table = wandb.Table(columns=columns)

                for res in results[: min(100, len(results))]:  # Limit to 100 examples
                    table.add_data(
                        res.get("task_id", "unknown"),
                        res["question"][:100] + "...",
                        res.get("correct_answer", ""),
                        res.get("predicted_answer", ""),
                        res.get("is_correct", False),
                    )

                wandb.log({"test_samples": table})

        # Restore original prompt type
        self.prompt_creator.set_prompt_type(original_prompt_type)
        # Restore the original response parser
        self.response_parser = ResponseParser.from_prompt_type(original_prompt_type)

        return summary

    def save_results(self, results, output_dir="./results"):
        """Save evaluation results to file"""
        os.makedirs(output_dir, exist_ok=True)

        timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
        results_file = os.path.join(output_dir, f"results_{timestamp}.json")

        # Create serializable results
        serializable_results = {
            "accuracy": results.get("accuracy", 0.0),
            "correct_count": results.get("correct_count", 0),
            "total_count": results.get("total_count", 0),
            "timestamp": timestamp,
            "prompt_type": results.get("prompt_type", "unknown"),
        }

        # Add perplexity metrics if available
        if "avg_perplexity" in results:
            serializable_results["avg_perplexity"] = results["avg_perplexity"]
            serializable_results["min_perplexity"] = results["min_perplexity"]
            serializable_results["max_perplexity"] = results["max_perplexity"]

        # Process individual results
        serializable_results["individual_results"] = []
        for result in results["results"]:
            # Skip perplexity in individual results to save space
            result_copy = result.copy()
            if "perplexity" in result_copy:
                del result_copy["perplexity"]

            # Convert choices if needed
            choices = result_copy["choices"]
            if not isinstance(choices, list):
                try:
                    import ast

                    result_copy["choices"] = ast.literal_eval(choices)
                except (SyntaxError, ValueError):
                    # Keep as-is if conversion fails
                    pass

            serializable_results["individual_results"].append(result_copy)

        # Save to file
        with open(results_file, "w") as f:
            import json

            json.dump(serializable_results, f, indent=2)

        print(f"Results saved to {results_file}")
        return results_file

# QwenTrainer class implementation:
* Please refer to the file `src/training/trainer.py` for the implementation.
* This notebook is guiding for inference with the latest model from HuggingFace Hub.
* For reproducing the training, please refer to `../train.sh`
* For more details, please refer to the `README.md` file.
* For the dataset, please refer to `https://huggingface.co/datasets/tuandunghcmut/coding-mcq-reasoning`
* For the legacy model, please refer to `LEGACY.md`


# Code for loading the latest model from HuggingFace Hub

In [24]:
# Load the latest model from HuggingFace Hub
from transformers import AutoModelForCausalLM, AutoTokenizer
from peft import PeftModel
import torch
import os

# ````
# Set HuggingFace Hub credentials if available
hf_token = os.environ.get("HF_TOKEN")

# Model ID on HuggingFace Hub
hub_model_id = "tuandunghcmut/Qwen25_Coder_MultipleChoice_v4"

print(f"Loading model from HuggingFace Hub: {hub_model_id}")

# Load the model and tokenizer
try:
    # Load tokenizer
    tokenizer = AutoTokenizer.from_pretrained(hub_model_id, token=hf_token, trust_remote_code=True)

    # Load model with appropriate parameters for inference
    model = AutoModelForCausalLM.from_pretrained(
        hub_model_id,
        token=hf_token,
        torch_dtype=torch.bfloat16,
        device_map="auto",
        trust_remote_code=True,
    )

    # Create a new model handler with the loaded model and tokenizer
    # from model_handler import ModelHandler  # Assuming ModelHandler class is available

    # lastest_model_handler_hub = QwenModelHandler(model_name=hub_model_id, max_seq_length=2048, quantization="4bit")
    lastest_model_handler_hub = QwenModelHandler(
        model_name=hub_model_id, max_seq_length=2048, quantization="8bit"
    )
    #  quantization="16bit")

    # Use FastLanguageModel
    from unsloth.models import FastLanguageModel

    FastLanguageModel.for_inference(lastest_model_handler_hub.model)
    prompt_creator = PromptCreator(PromptCreator.YAML_REASONING)
    # Create a tester with the loaded model
    latest_tester_hub = MultipleChoiceTester(
        lastest_model_handler_hub, prompt_creator=prompt_creator
    )

    print("Successfully loaded model from HuggingFace Hub!")

except Exception as e:
    print(f"Error loading model from HuggingFace Hub: {e}")
    print("Continuing with locally trained model...")

Loading model from HuggingFace Hub: tuandunghcmut/Qwen25_Coder_MultipleChoice_v4


INFO:accelerate.utils.modeling:We will use 90% of the memory on device 0 for storing the model, and 10% for the buffer to avoid OOM. You can set `max_memory` in to a higher value to use more memory (at your own risk).


Loading tuandunghcmut/Qwen25_Coder_MultipleChoice_v4 with Unsloth, max_seq_length=2048
==((====))==  Unsloth 2025.3.19: Fast Qwen2 patching. Transformers: 4.50.3.
   \\   /|    Tesla V100-SXM2-32GB. Num GPUs = 1. Max memory: 31.733 GB. Platform: Linux.
O^O/ \_/ \    Torch: 2.6.0+cu124. CUDA: 7.0. CUDA Toolkit: 12.4. Triton: 3.2.0
\        /    Bfloat16 = FALSE. FA [Xformers = 0.0.29.post3. FA2 = False]
 "-____-"     Free license: http://github.com/unslothai/unsloth
Unsloth: Fast downloading is enabled - ignore downloading bars which are red colored!
Successfully loaded model from HuggingFace Hub!


# Code for fast streaming inference and 10 coding examples

In [25]:
import yaml
from IPython.display import Markdown, display
import time

# Define test examples with varied correct answers
examples = [
    {
        "question": "Which of the following is NOT a valid way to initialize a variable in C++?",
        "choices": ["int x = 5;", "int x(5);", "int x{5};", "int x := 5;"],
        "answer": "D",
    },
    {
        "question": "In C, what does the 'malloc' function do?",
        "choices": [
            "Frees allocated memory",
            "Allocates memory dynamically",
            "Manages automatic memory",
            "Moves allocated memory",
        ],
        "answer": "B",
    },
    {
        "question": "Which C++ keyword is used to define a class template?",
        "choices": ["class", "virtual", "template", "typename"],
        "answer": "C",
    },
    {
        "question": "What is the correct way to access a member of a structure through a pointer in C?",
        "choices": ["pointer.member", "pointer->member", "pointer::member", "pointer@member"],
        "answer": "B",
    },
    {
        "question": "Which of the following is NOT a storage class specifier in C?",
        "choices": ["static", "extern", "register", "virtual"],
        "answer": "D",
    },
    {
        "question": "What does the 'const' keyword signify in C++?",
        "choices": [
            "The variable can be modified indirectly",
            "The variable cannot be modified",
            "The variable is stored in constant memory",
            "The variable is initialized at compile time",
        ],
        "answer": "B",
    },
    {
        "question": "Which C++ feature provides runtime polymorphism?",
        "choices": ["Virtual functions", "Templates", "Operator overloading", "Friend functions"],
        "answer": "A",
    },
    {
        "question": "In C++, what is the purpose of the 'new' operator?",
        "choices": [
            "To create a new class",
            "To allocate memory dynamically",
            "To initialize a new variable",
            "To create a new scope",
        ],
        "answer": "B",
    },
    {
        "question": "What is the correct way to declare a function pointer in C?",
        "choices": [
            "void (*func)(int);",
            "void *func(int);",
            "func->void(int);",
            "pointer void func(int);",
        ],
        "answer": "A",
    },
    {
        "question": "Which of these is NOT a valid C++ smart pointer type?",
        "choices": ["std::unique_ptr", "std::shared_ptr", "std::weak_ptr", "std::auto_ptr"],
        "answer": "D",
    },
]


# Function to process and display examples with streaming markdown
def process_example(example, index):
    md_content = f"## Example {index+1}\n\n"
    md_content += f"**Question:** {example['question']}\n\n"
    md_content += "**Choices:**\n"

    for i, choice in enumerate(example["choices"]):
        md_content += f"- **{chr(65+i)}.** {choice}\n"

    display(Markdown(md_content))

    # Convert to YAML format if needed (for examples 4-6 and 8-10)
    if index >= 3:
        example_dict = yaml.safe_load(yaml.safe_dump(example))
    else:
        example_dict = example

    # Start streaming response
    display(Markdown("**Model Response (streaming):**"))

    result = latest_tester_hub.infer_example(
        example_dict, temperature=0.00001, stream=True, use_cache=False
    )

    # Display final result
    result_md = f"**Predicted Answer:** {result['predicted_answer']}\n\n"
    result_md += f"**Correct Answer:** {example['answer']}\n\n"
    result_md += "**Reasoning:**\n\n"

    try:
        result_md += result["reasoning"]
    except Exception as e:
        result_md += f"Error: {e}"

    display(Markdown(result_md))
    display(Markdown("---"))


# Process all examples
for i, example in enumerate(examples):
    process_example(example, i)

## Example 1

**Question:** Which of the following is NOT a valid way to initialize a variable in C++?

**Choices:**
- **A.** int x = 5;
- **B.** int x(5);
- **C.** int x{5};
- **D.** int x := 5;


**Model Response (streaming):**

Model response:
understanding: |
  The question asks which method is not a valid way to initialize a variable in C++. This involves understanding how different syntax for initializing variables works in C++.
analysis: |
  A. `int x = 5;` - This is correct because it uses the assignment initializer syntax, which is standard for initializing variables in C++.
  B. `int x(5);` - This is incorrect because the constructor syntax is used for creating objects, not for initializing variables.
  C. `int x{5};` - This is correct because it uses curly braces, which is the modern C++ initialization syntax for variables.
  D. `int x := 5;` - This is incorrect because the colon-initializer syntax was used in older versions of C++ (before C++11) but has been deprecated and removed in newer standards.
reasoning: |
  The key distinction lies in the syntax used for initialization. Option A uses the assignment initializer, which is correct. Options B and D use constructs that were part of earlier C++ sta

**Predicted Answer:** D

**Correct Answer:** D

**Reasoning:**

Understanding: The question asks which method is not a valid way to initialize a variable in C++. This involves understanding how different syntax for initializing variables works in C++.


The key distinction lies in the syntax used for initialization. Option A uses the assignment initializer, which is correct. Options B and D use constructs that were part of earlier C++ standards but have been phased out. Option C uses the modern C++ brace-initializer, which is correct. Therefore, options B and D are invalid methods for initializing variables in C++.


Conclusion: Answer D is correct because the colon-initializer syntax is no longer supported in modern C++, making it invalid for initializing variables.


---

## Example 2

**Question:** In C, what does the 'malloc' function do?

**Choices:**
- **A.** Frees allocated memory
- **B.** Allocates memory dynamically
- **C.** Manages automatic memory
- **D.** Moves allocated memory


**Model Response (streaming):**

Model response:
understanding: |
  The question asks about the purpose of the 'malloc' function in C programming, which involves understanding how it allocates memory dynamically.
analysis: |
  A. Incorrect. 'free' is used to deallocate memory, not allocate it.
  B. Correct. 'malloc' stands for "memory allocation" and is used to request dynamic memory from the system.
  C. Incorrect. 'calloc' and 'realloc' manage memory allocation, but 'malloc' itself does not.
  D. Incorrect. Memory movement is typically handled by functions like 'memmove', not 'malloc'.
reasoning: |
  The 'malloc' function is specifically designed to allocate a block of memory of a specified size from the heap. This allows programs to dynamically allocate memory as needed during runtime, unlike static storage classes that require explicit declaration and definition.
conclusion: |
  Answer B is correct because 'malloc' is explicitly used to allocate memory dynamically, making it the correct choice among the options pr

**Predicted Answer:** B

**Correct Answer:** B

**Reasoning:**

Understanding: The question asks about the purpose of the 'malloc' function in C programming, which involves understanding how it allocates memory dynamically.


The 'malloc' function is specifically designed to allocate a block of memory of a specified size from the heap. This allows programs to dynamically allocate memory as needed during runtime, unlike static storage classes that require explicit declaration and definition.


Conclusion: Answer B is correct because 'malloc' is explicitly used to allocate memory dynamically, making it the correct choice among the options provided.


---

## Example 3

**Question:** Which C++ keyword is used to define a class template?

**Choices:**
- **A.** class
- **B.** virtual
- **C.** template
- **D.** typename


**Model Response (streaming):**

Model response:
understanding: |
  The question asks which C++ keyword is specifically used to define a class template. Class templates allow for the creation of generic classes that can work with different data types.
analysis: |
  A. class - This keyword is used to define a new user-defined type or class, but it does not specifically denote a class template.
  B. virtual - This keyword is used in inheritance to specify that a function in a derived class overrides a function in its base class, not to define a class template.
  C. template - This keyword is used to declare a class, function, or variable as a template, allowing it to be instantiated with various data types.
  D. typename - This keyword is used within class templates to explicitly state that a name is a type, often used with nested types like class templates.
reasoning: |
  The correct keyword to define a class template is "template." It allows developers to create reusable code snippets that can operate on different dat

**Predicted Answer:** C

**Correct Answer:** C

**Reasoning:**

Understanding: The question asks which C++ keyword is specifically used to define a class template. Class templates allow for the creation of generic classes that can work with different data types.


The correct keyword to define a class template is "template." It allows developers to create reusable code snippets that can operate on different data types. While "class" and "typename" are related concepts in C++, they do not define class templates; "virtual" pertains to inheritance, and "typename" is used in templates to clarify type names.


Conclusion: Answer C is correct because "template" is the specific keyword used to define a class template in C++.


---

## Example 4

**Question:** What is the correct way to access a member of a structure through a pointer in C?

**Choices:**
- **A.** pointer.member
- **B.** pointer->member
- **C.** pointer::member
- **D.** pointer@member


**Model Response (streaming):**

Model response:
understanding: |
  The question asks how to correctly use a pointer to access a member of a structure in C. This involves understanding the syntax used for dereferencing pointers to access structure members.
analysis: |
  A. pointer.member - Incorrect because it uses dot notation, which requires the structure type or an instance to access members.
  B. pointer->member - Correct because it uses arrow notation, which dereferences the pointer to access the member of the structure pointed to.
  C. pointer::member - Incorrect due to incorrect scope resolution syntax; C does not support double colons for accessing structure members through a pointer.
  D. pointer@member - Incorrect because there is no such operator as @ in C for accessing structure members through a pointer.
reasoning: |
  In C, when working with structures through pointers, you need to dereference the pointer using the arrow (->) operator to access the members of the structure. This allows you to navigate th

**Predicted Answer:** B

**Correct Answer:** B

**Reasoning:**

Understanding: The question asks how to correctly use a pointer to access a member of a structure in C. This involves understanding the syntax used for dereferencing pointers to access structure members.


In C, when working with structures through pointers, you need to dereference the pointer using the arrow (->) operator to access the members of the structure. This allows you to navigate through the memory location that the pointer points to.


Conclusion: Answer B is correct because it uses the arrow operator to dereference the pointer and access the structure member, which is the proper method in C for this operation.


---

## Example 5

**Question:** Which of the following is NOT a storage class specifier in C?

**Choices:**
- **A.** static
- **B.** extern
- **C.** register
- **D.** virtual


**Model Response (streaming):**

Model response:
understanding: |
  The question asks which option is not a storage class specifier in C programming language. Storage classes determine how variables or functions are stored in memory and affect their scope and lifetime.
analysis: |
  A. static - This storage class specifier is used to declare a variable with local storage duration, meaning it retains its value between function calls.
  B. extern - This specifies that a variable or function is defined elsewhere, allowing multiple files to share data.
  C. register - This allows a variable to be stored in CPU registers for faster access, but it does not define a storage class.
  D. virtual - This is not a standard storage class specifier in C; it might be confused with virtual functions in object-oriented programming contexts.
reasoning: |
  The correct answer must be the one that does not belong to any of the standard storage class specifiers in C. Option C, "register," is incorrect because it does not define a storage 

**Predicted Answer:** C

**Correct Answer:** D

**Reasoning:**

Understanding: The question asks which option is not a storage class specifier in C programming language. Storage classes determine how variables or functions are stored in memory and affect their scope and lifetime.


The correct answer must be the one that does not belong to any of the standard storage class specifiers in C. Option C, "register," is incorrect because it does not define a storage class; rather, it suggests storing variables in CPU registers. Options A, B, and D are all valid storage class specifiers in C.


Conclusion: Answer C is correct because "register" is not a storage class specifier; instead, it pertains to optimizing variable storage in CPU registers.


---

## Example 6

**Question:** What does the 'const' keyword signify in C++?

**Choices:**
- **A.** The variable can be modified indirectly
- **B.** The variable cannot be modified
- **C.** The variable is stored in constant memory
- **D.** The variable is initialized at compile time


**Model Response (streaming):**

Model response:
understanding: |
  The question asks about the significance of the 'const' keyword in C++, specifically regarding its ability to prevent a variable from being modified.
analysis: |
  A. Incorrect. The 'const' keyword does not imply indirect modification; it restricts direct modification.
  B. Correct. The 'const' keyword makes a variable immutable once assigned, preventing any further modification.
  C. Incorrect. 'const' does not specify where a variable is stored; it only affects its modifiability.
  D. Incorrect. While 'const' can help optimize code by storing variables in read-only memory, it does not inherently "initialize" variables at compile time.
reasoning: |
  The 'const' keyword in C++ is used to declare a variable that cannot be changed after initialization. This prevents accidental modifications, which is a key feature of making variables immutable. Option B accurately describes this behavior, while the other options misrepresent the primary function of 'co

**Predicted Answer:** B

**Correct Answer:** B

**Reasoning:**

Understanding: The question asks about the significance of the 'const' keyword in C++, specifically regarding its ability to prevent a variable from being modified.


The 'const' keyword in C++ is used to declare a variable that cannot be changed after initialization. This prevents accidental modifications, which is a key feature of making variables immutable. Option B accurately describes this behavior, while the other options misrepresent the primary function of 'const'.


Conclusion: Answer B is correct because the 'const' keyword ensures that a variable's value cannot be altered once set, thus preventing modification directly.


---

## Example 7

**Question:** Which C++ feature provides runtime polymorphism?

**Choices:**
- **A.** Virtual functions
- **B.** Templates
- **C.** Operator overloading
- **D.** Friend functions


**Model Response (streaming):**

Model response:
understanding: |
  The question asks which C++ feature supports runtime polymorphism, a key concept in object-oriented programming where different classes can be treated as instances of the same base class.
analysis: |
  A. Virtual functions allow for runtime polymorphism by enabling a method to be overridden in derived classes while still calling the correct implementation based on the pointer or reference type.
  B. Templates enable compile-time polymorphism by allowing code reuse across different data types but do not support runtime polymorphism.
  C. Operator overloading allows new operators to be defined for user-defined types but does not inherently support runtime polymorphism.
  D. Friend functions allow one function to access the private and protected members of another class but do not support runtime polymorphism.
reasoning: |
  Virtual functions are specifically designed to achieve runtime polymorphism by enabling a base class pointer to call a derived clas

**Predicted Answer:** A

**Correct Answer:** A

**Reasoning:**

Understanding: The question asks which C++ feature supports runtime polymorphism, a key concept in object-oriented programming where different classes can be treated as instances of the same base class.


Virtual functions are specifically designed to achieve runtime polymorphism by enabling a base class pointer to call a derived class method. This is possible because the actual method called depends on the object's type at runtime, unlike static binding that occurs at compile time with templates or operator overloading.


Conclusion: Answer A is correct because virtual functions provide the necessary mechanism for runtime polymorphism by allowing derived class methods to be invoked dynamically based on the object's type.


---

## Example 8

**Question:** In C++, what is the purpose of the 'new' operator?

**Choices:**
- **A.** To create a new class
- **B.** To allocate memory dynamically
- **C.** To initialize a new variable
- **D.** To create a new scope


**Model Response (streaming):**

Model response:
understanding: |
  The question asks about the primary function of the 'new' operator in C++. Understanding its role involves recognizing how it interacts with memory allocation.
analysis: |
  A. Incorrect. Creating a new class is done using the 'class' keyword or struct, not 'new'.
  B. Correct. The 'new' operator is used to request dynamic memory allocation from the system, allowing for the creation of objects that may not exist at compile time.
  C. Incorrect. Initializing a variable is typically done using assignment operators like '=' or constructors, not 'new'.
  D. Incorrect. A scope is managed by 'static', 'extern', or block-level declarations, not the 'new' operator.
reasoning: |
  The 'new' operator is specifically designed to allocate memory for an object during runtime. This allows developers to create objects whose size or location are determined at execution time, which is essential for implementing features like dynamic data structures and polymorphism.
c

**Predicted Answer:** B

**Correct Answer:** B

**Reasoning:**

Understanding: The question asks about the primary function of the 'new' operator in C++. Understanding its role involves recognizing how it interacts with memory allocation.


The 'new' operator is specifically designed to allocate memory for an object during runtime. This allows developers to create objects whose size or location are determined at execution time, which is essential for implementing features like dynamic data structures and polymorphism.


Conclusion: Answer B is correct because the 'new' operator's primary purpose is to allocate memory dynamically, enabling flexible memory management in C++ programs.


---

## Example 9

**Question:** What is the correct way to declare a function pointer in C?

**Choices:**
- **A.** void (*func)(int);
- **B.** void *func(int);
- **C.** func->void(int);
- **D.** pointer void func(int);


**Model Response (streaming):**

Model response:
understanding: |
  The question asks for the correct syntax to declare a function pointer in C. Function pointers are used to store addresses of functions that can be called later.
analysis: |
  A. void (*func)(int); - Correct syntax for declaring a function pointer. It specifies the return type (void) and the argument list (int), followed by the parentheses indicating it's a function pointer.
  B. void *func(int); - Incorrect because it lacks the parentheses around the parameter list, which is necessary for defining a function pointer.
  C. func->void(int); - Incorrect due to incorrect use of arrow notation and missing parentheses, which do not define a function pointer.
  D. pointer void func(int); - Incorrect because "pointer" is not a valid keyword in C for function pointers; the correct keyword is "function".
reasoning: |
  To declare a function pointer, you must specify both the return type and the argument list within parentheses. Option A correctly follows this 

**Predicted Answer:** A

**Correct Answer:** A

**Reasoning:**

Understanding: The question asks for the correct syntax to declare a function pointer in C. Function pointers are used to store addresses of functions that can be called later.


To declare a function pointer, you must specify both the return type and the argument list within parentheses. Option A correctly follows this pattern, while the other options fail to include these essential components.


Conclusion: Answer A is correct because it properly declares a function pointer using the required syntax: return type, parentheses enclosing the argument list, and the asterisk (*) to denote it as a pointer.


---

## Example 10

**Question:** Which of these is NOT a valid C++ smart pointer type?

**Choices:**
- **A.** std::unique_ptr
- **B.** std::shared_ptr
- **C.** std::weak_ptr
- **D.** std::auto_ptr


**Model Response (streaming):**

Model response:
understanding: |
  The question asks which option is not a valid C++ smart pointer type. Smart pointers are used to manage memory more efficiently and safely in C++. They help prevent memory leaks and ensure that resources are freed when they are no longer needed.
analysis: |
  A. std::unique_ptr - This is a unique pointer, which owns the object it points to and deletes it when no longer needed.
  B. std::shared_ptr - This is a shared pointer, allowing multiple pointers to point to the same object and automatically managing resource ownership.
  C. std::weak_ptr - This is a weak pointer, which holds a non-owning reference to an object managed by a shared pointer. It does not affect the lifetime of the object it points to.
  D. std::auto_ptr - This was a legacy C++ smart pointer that owned the object it pointed to and deleted it upon destruction. However, it has been deprecated due to its inefficiencies and lack of safety features compared to modern smart pointers.
reaso

**Predicted Answer:** D

**Correct Answer:** D

**Reasoning:**

Understanding: The question asks which option is not a valid C++ smart pointer type. Smart pointers are used to manage memory more efficiently and safely in C++. They help prevent memory leaks and ensure that resources are freed when they are no longer needed.


The key distinction lies in the behavior of std::auto_ptr, which is outdated and not recommended for use in new code. Other options (std::unique_ptr, std::shared_ptr, and std::weak_ptr) all serve different purposes in managing memory, making them valid choices. Therefore, std::auto_ptr is not a valid C++ smart pointer type.


Conclusion: Answer D is correct because std::auto_ptr is an outdated and unsafe smart pointer that should not be used in new C++ code. Options A, B, and C are valid types for smart pointers.


---