In [1]:
import os
import os
import time
import json
import yaml

import os
import time
import json
import pandas as pd
import warnings

# Suppress warnings
warnings.filterwarnings('ignore')

# Import Wandb for experiment tracking

# Import Unsloth
# Import HuggingFace libraries

# Try to import HF token from environment
HF_TOKEN = os.environ.get("HF_TOKEN")

# Disable HuggingFace tokenizers parallelism warning
os.environ["TOKENIZERS_PARALLELISM"] = "false"

# Be careful when interact with these below code related to repository settings.

In [2]:
# !git clone https://github.com/tuandunghcmut/Small-Qwen-Coding-Multiple-Choice.git
# import sys
# sys.path.append("Small-Qwen-Coding-Multiple-Choice")

# %cd Small-Qwen-Coding-Multiple-Choice

# Import minimal things to run the code

In [3]:
try:
    import src
except ImportError:
    import sys
    sys.path.append("../")

from src.model.qwen_handler import QwenModelHandler, ModelSource
from src.prompt_processors.prompt_creator import PromptCreator

🦥 Unsloth: Will patch your computer to enable 2x faster free finetuning.
🦥 Unsloth Zoo will now patch everything to make training faster!


In [4]:
from transformers import AutoModelForCausalLM, AutoTokenizer
from peft import PeftModel
import torch
import os
import unsloth
from src.testing.tester import MultipleChoiceTester

# ````
# Set HuggingFace Hub credentials if available
hf_token = os.environ.get("HF_TOKEN")

# Model ID on HuggingFace Hub
hub_model_id = "tuandunghcmut/Qwen25_Coder_MultipleChoice_v4"
print(f"Loading model from HuggingFace Hub: {hub_model_id}")

try:


    lastest_model_handler_hub = QwenModelHandler(
        model_name=hub_model_id,
        max_seq_length=2048,
        # quantization="4bit",
        model_source=ModelSource.UNSLOTH,
    )

    # Use FastLanguageModel
    from unsloth.models import FastLanguageModel

    FastLanguageModel.for_inference(lastest_model_handler_hub.model)
    prompt_creator = PromptCreator(PromptCreator.YAML_REASONING)
    # Create a tester with the loaded model
    latest_tester_hub = MultipleChoiceTester(
        lastest_model_handler_hub, prompt_creator=prompt_creator
    )

    print("Successfully loaded model from HuggingFace Hub!")

except Exception as e:
    print(f"Error loading model from HuggingFace Hub: {e}")
    print("Continuing with locally trained model...")

Loading model from HuggingFace Hub: tuandunghcmut/Qwen25_Coder_MultipleChoice_v4
2025-04-04 08:07:49 - src.model.qwen_handler - INFO - Loading tuandunghcmut/Qwen25_Coder_MultipleChoice_v4 from unsloth, max_seq_length=2048


INFO:src.model.qwen_handler:Loading tuandunghcmut/Qwen25_Coder_MultipleChoice_v4 from unsloth, max_seq_length=2048


2025-04-04 08:07:49 - src.model.qwen_handler - INFO - Flash Attention 2 is available (package flash-attn detected)


INFO:src.model.qwen_handler:Flash Attention 2 is available (package flash-attn detected)


2025-04-04 08:07:49 - src.model.qwen_handler - INFO - Flash Attention 2 version: 2.7.4.post1


INFO:src.model.qwen_handler:Flash Attention 2 version: 2.7.4.post1


2025-04-04 08:07:49 - src.model.qwen_handler - INFO - xFormers is available (version: 0.0.29.post3)


INFO:src.model.qwen_handler:xFormers is available (version: 0.0.29.post3)


2025-04-04 08:07:49 - src.model.qwen_handler - INFO - CUDA is available (version: 12.4)


INFO:src.model.qwen_handler:CUDA is available (version: 12.4)


2025-04-04 08:07:49 - src.model.qwen_handler - INFO - Using attention implementation: default


INFO:src.model.qwen_handler:Using attention implementation: default


2025-04-04 08:07:49 - src.model.qwen_handler - INFO - Setting max memory: {0: '27620MiB'}


INFO:src.model.qwen_handler:Setting max memory: {0: '27620MiB'}


==((====))==  Unsloth 2025.3.19: Fast Qwen2 patching. Transformers: 4.50.3.
   \\   /|    Tesla V100-SXM2-32GB. Num GPUs = 1. Max memory: 31.733 GB. Platform: Linux.
O^O/ \_/ \    Torch: 2.6.0+cu124. CUDA: 7.0. CUDA Toolkit: 12.4. Triton: 3.2.0
\        /    Bfloat16 = FALSE. FA [Xformers = 0.0.29.post3. FA2 = False]
 "-____-"     Free license: http://github.com/unslothai/unsloth
Unsloth: Fast downloading is enabled - ignore downloading bars which are red colored!


Unsloth 2025.3.19 patched 28 layers with 0 QKV layers, 0 O layers and 0 MLP layers.


2025-04-04 08:08:00 - src.model.qwen_handler - INFO - Model loaded successfully: tuandunghcmut/Qwen25_Coder_MultipleChoice_v4


INFO:src.model.qwen_handler:Model loaded successfully: tuandunghcmut/Qwen25_Coder_MultipleChoice_v4


2025-04-04 08:08:00 - src.model.qwen_handler - INFO - Model type: qwen2


INFO:src.model.qwen_handler:Model type: qwen2


2025-04-04 08:08:00 - src.model.qwen_handler - INFO - hidden_size: 1536


INFO:src.model.qwen_handler:hidden_size: 1536


2025-04-04 08:08:00 - src.model.qwen_handler - INFO - intermediate_size: 8960


INFO:src.model.qwen_handler:intermediate_size: 8960


2025-04-04 08:08:00 - src.model.qwen_handler - INFO - num_hidden_layers: 28


INFO:src.model.qwen_handler:num_hidden_layers: 28


2025-04-04 08:08:00 - src.model.qwen_handler - INFO - num_attention_heads: 12


INFO:src.model.qwen_handler:num_attention_heads: 12


2025-04-04 08:08:00 - src.model.qwen_handler - INFO - torch_dtype: float16


INFO:src.model.qwen_handler:torch_dtype: float16


Successfully loaded model from HuggingFace Hub!


# Inference Streaming

In [6]:
import yaml
import time
from IPython.display import clear_output
# Python example with revised prompt including a concise chain‐of‐thought explanation
python_example = {
    "question": "Which of the following correctly defines a list comprehension in Python?",
    "choices": [
        "[x**2 for x in range(10)]",
        "for(x in range(10)) { return x**2; }",
        "map(lambda x: x**2, range(10))",
        "[for x in range(10): x**2]"
    ],
    "answer": "A",  # Optional ground truth
    "chain_of_thought": (
        "understanding: The question is asking which option correctly represents a list comprehension in Python. "
        "List comprehensions are a concise way to create lists using expressions and iterators.\n"
        "analysis: A. This option uses a list comprehension syntax, which is correct. It iterates over the range of numbers from 0 to 9, "
        "squares each number, and collects the results into a list.\n"
        "    B. This option uses a for loop with a lambda function, which is syntactically similar to a list comprehension but not the same. "
        "It does not use list comprehension syntax.\n"
        "    C. This option uses the map function with a lambda, which is functionally similar to a list comprehension but not the same. "
        "It does not use list comprehension syntax.\n"
        "    D. This option uses a for loop with a colon, which is syntactically similar to a list comprehension but not the same. "
        "It does not use list comprehension syntax.\n"
        "reasoning: List comprehensions in Python are defined using the syntax [expression for item in iterable if condition]."
    )
}

def stream_infer_example_in_notebook(example, temperature=0.0001):
    result = latest_tester_hub.infer_example(
        example, temperature=0.0001, stream=True
    )

    complete_response = ""
    try:
        for chunk in result:
            # Expect each chunk to be a tuple: (prompt, formatted_response, ...)
            if isinstance(chunk, (tuple, list)) and len(chunk) >= 2:
                _, formatted_response, *rest = chunk
            else:
                formatted_response = str(chunk)

            # clear the output
            clear_output(wait=True)

            # Accumulate the response by appending each new chunk
            complete_response += formatted_response

            # Print the accumulated response without clearing previous output
            print(complete_response)
            # time.sleep(0.0001)  # Slight delay for smoother updates
    except Exception as e:
        print(f"Error during streaming: {e}")

    print("----- Streaming complete -----")

stream_infer_example_in_notebook(python_example)


understanding: |
  The question is asking which option correctly represents a list comprehension in Python. List comprehensions are a concise way to create lists using expressions and iterators.
analysis: |
  A. This option uses a list comprehension syntax, which is correct. It iterates over the range of numbers from 0 to 9, squares each number, and collects the results into a list.
  B. This option uses a for loop with a lambda function, which is syntactically similar to a list comprehension but not the same. It does not use list comprehension syntax.
  C. This option uses the map function with a lambda, which is functionally similar to a list comprehension but not the same. It does not use list comprehension syntax.
  D. This option uses a for loop with a colon, which is syntactically similar to a list comprehension but not the same. It does not use list comprehension syntax.
reasoning: |
  List comprehensions in Python are defined using the syntax `[expression for item in iterable i

# Inference without Streaming to get full completion



In [35]:
result = latest_tester_hub.infer_example(
    python_example, temperature=0.0001, stream=False
)

In [37]:
result

{'question': 'Which of the following correctly defines a list comprehension in Python?',
 'choices': '[x**2 for x in range(10)]\nfor(x in range(10)) { return x**2; }\nmap(lambda x: x**2, range(10))\n[for x in range(10): x**2]',
 'ground_truth': 'A',
 'predicted_answer': 'A',
 'reasoning': 'Understanding:\nThe question is asking which option correctly represents a list comprehension in Python. List comprehensions are a concise way to create lists using expressions and iterators.\n\n\nAnalysis:\nA. This option uses a list comprehension syntax, which is correct. It iterates over the range of numbers from 0 to 9, squares each number, and collects the results into a list.\nB. This option uses a for loop with a lambda function, which is syntactically similar to a list comprehension but not the same. It does not use list comprehension syntax.\nC. This option uses the map function with a lambda, which is functionally similar to a list comprehension but not the same. It does not use list compre

# Load yaml completion

In [38]:
import yaml
from pprint import pprint
import json
from IPython.display import display, Markdown

try:
    # Load yaml completion with safe loader
    yaml_completion = yaml.safe_load(result['response_text'])
    
    # Display full yaml completion in markdown
    display(Markdown("## Full YAML Completion"))
    display(Markdown("```yaml\n" + result['response_text'] + "\n```"))
    
    # Display yaml completion in JSON format
    display(Markdown("## YAML Completion as JSON"))
    json_formatted = json.dumps(yaml_completion, indent=2)
    display(Markdown("```json\n" + json_formatted + "\n```"))
    
except yaml.YAMLError as e:
    display(Markdown(f"**Error parsing YAML:** {e}"))
    display(Markdown("### Raw Response Text:"))
    display(Markdown("```\n" + result['response_text'] + "\n```"))


## Full YAML Completion

```yaml
understanding: |
  The question is asking which option correctly represents a list comprehension in Python. List comprehensions are a concise way to create lists using expressions and iterators.
analysis: |
  A. This option uses a list comprehension syntax, which is correct. It iterates over the range of numbers from 0 to 9, squares each number, and collects the results into a list.
  B. This option uses a for loop with a lambda function, which is syntactically similar to a list comprehension but not the same. It does not use list comprehension syntax.
  C. This option uses the map function with a lambda, which is functionally similar to a list comprehension but not the same. It does not use list comprehension syntax.
  D. This option uses a for loop with a colon, which is syntactically similar to a list comprehension but not the same. It does not use list comprehension syntax.
reasoning: |
  List comprehensions in Python are defined using the syntax `[expression for item in iterable if condition]`. Option A correctly uses this syntax, iterating over a range and squaring each number, resulting in a list of squares. Options B, C, and D do not use this syntax, making them incorrect.
conclusion: |
  Option A is correct because it uses the list comprehension syntax, which is the standard way to create lists in Python using expressions and iterators.
answer: A
```

## YAML Completion as JSON

```json
{
  "understanding": "The question is asking which option correctly represents a list comprehension in Python. List comprehensions are a concise way to create lists using expressions and iterators.\n",
  "analysis": "A. This option uses a list comprehension syntax, which is correct. It iterates over the range of numbers from 0 to 9, squares each number, and collects the results into a list.\nB. This option uses a for loop with a lambda function, which is syntactically similar to a list comprehension but not the same. It does not use list comprehension syntax.\nC. This option uses the map function with a lambda, which is functionally similar to a list comprehension but not the same. It does not use list comprehension syntax.\nD. This option uses a for loop with a colon, which is syntactically similar to a list comprehension but not the same. It does not use list comprehension syntax.\n",
  "reasoning": "List comprehensions in Python are defined using the syntax `[expression for item in iterable if condition]`. Option A correctly uses this syntax, iterating over a range and squaring each number, resulting in a list of squares. Options B, C, and D do not use this syntax, making them incorrect.\n",
  "conclusion": "Option A is correct because it uses the list comprehension syntax, which is the standard way to create lists in Python using expressions and iterators.\n",
  "answer": "A"
}
```