In [2]:
from distilabel.pipeline import Pipeline
from distilabel.steps.tasks import TextGeneration
from distilabel.steps import LoadDataFromHub
from distilabel.llms import OpenAILLM

from pydantic import BaseModel, Field
class Question(BaseModel):
    question: str = Field(..., description="The question to be answered")

with Pipeline(name="generate_questions_from_explanations") as pipeline:
    load_dataset = LoadDataFromHub(
        name="load_dataset",
        repo_id="zefang-liu/secqa",
        split="test",
        config="secqa_v1",

        batch_size=2,
        # output_mappings={"Explanation": "Explanation"}
    )

    # Initialize the LLM
    llm = OpenAILLM(
        model="llama-3.2-3B",
        base_url="http://localhost:4000/v1",
            structured_output={
                "schema": Question,
                "format": "json",
            }
    )
    # llm.load()

    # Define the text generation task
    text_gen_task = TextGeneration(
        name="instruction_gen",
        llm=llm,input_batch_size=2,
        system_prompt="""You are an AI assistant tasked with generating questions based on provided text.
        
        # Output Format:
        {"question": "..."}""",
        template='Text: {{ Explanation }}\n\n{ "question" : "',
        columns=['Explanation']
    )

    # Prepare the data in the required format
    load_dataset >> text_gen_task
    # print(next(text_gen_task.process(load_dataset)))

       
        # generation_kwargs={"response_format":Question.model_json_schema()
# Run the pipeline
results = pipeline.run(use_cache=False,)

# Extract the generated questions
generated_questions = [result['generation'] for result in results["default"]["train"]]
generated_questions


Generating train split: 0 examples [00:00, ? examples/s]

['{"question":"What is typically indicated by a sudden increase in SQL queries beyond the normal operational baseline?"}',
 '{"question":"What does plaintext refer to?"}',
 '{"question":"What is Encryption?"}',
 '{"question":"What is Insecure Design?"}',
 None,
 None,
 None,
 None,
 '{"question":"What is the definition of a MitM attack?"}',
 '{"question":"What are the benefits of implementing strong password policies on mobile devices?"}',
 '{"question":"What are Intrusion Detection Systems (IDS) and Intrusion Prevention Systems (IPS) commonly alerted for?"}',
 '{"question":"What are the key benefits of having a well-defined incident response policy?"}',
 '{"question":"What is the primary purpose of secure vault services?"}',
 '{"question":"What exactly does it mean for an intermediate CA\'s key to be signed by a root CA in PKI?"}',
 '{"question":"What is Business Continuity Planning (BCP)?"}',
 '{"question":"What is tailgating in security?"}',
 '{"question":"What is the importance of 

In [None]:
results["default"]["train"][0]

In [None]:
generated_questions = [result['generation'] for result in results]

In [1]:
from distilabel.llms import LiteLLM

llm = LiteLLM(model="ollama/llama3.2:latest" ,
            structured_output={
                "schema": Question.model_json_schema(),
                "format": "json",
            })

llm.load()
# Call the modelsystem_prompt="""You are an AI assistant tasked with generating questions based on provided text.
        
        # # Output Format:
        # {"question": "..."}""",
        # template='Text: {{ Explanation }}\n\n{ "question" : "'
system_prompt = """You are an AI assistant tasked with generating questions based on provided text.
        
        # Output Format:
        {"question": "..."}"""

explaination = """Text: The Great Barrier Reef, located off the coast of Queensland, Australia, is the world's largest coral reef system. Spanning over 2,300 kilometers (1,430 miles), it comprises more than 2,900 individual reefs and 900 islands. This vibrant ecosystem is home to an incredible variety of marine life, including over 1,500 species of fish, 400 species of coral, and iconic creatures such as sea turtles, sharks, and dugongs. The reef plays a crucial role in marine biodiversity and serves as a natural barrier, protecting coastlines from wave erosion. However, it faces significant threats from climate change, including coral bleaching caused by rising sea temperatures, pollution, and overfishing. Efforts are underway globally to preserve this UNESCO World Heritage Site, ensuring its survival for future generations.\n\n{ "question" : """
output = llm.generate(inputs=[[
    {"role": "system", "content": system_prompt},
    {"role": "user", "content": explaination}]] )
output


  from distilabel.llms import LiteLLM


# Run Working Examplee

In [20]:
from litellm import get_supported_openai_params
get_supported_openai_params(model="llama3.2:latest", custom_llm_provider="ollama")

['max_tokens',
 'stream',
 'top_p',
 'temperature',
 'seed',
 'frequency_penalty',
 'stop',
 'response_format']

In [3]:
import litellm
import os
from litellm import batch_completion
from pydantic import BaseModel, Field
class Question(BaseModel):
    question: str = Field(..., description="The question to be answered")

litellm.set_verbose = False

responses = batch_completion(
    model="openai/llama3.2:latest",
    api_base="http://localhost:11434/v1",
    messages = [
        [
            {
                "role": "user",
                "content":'good morning? Please respond in JSON format. {'
            }
        ],
        [
            {
                "role": "user",
                "content": "what's the time? Please respond in JSON format."
            }
        ]
    ],
    response_format={ "type": "json_object"}
)
[response.to_dict()["choices"][0]["message"]["content"] for response in responses]
# [response for response in responses]

['{"greeting": "good morning", "confirmation": true}', '{ "time": null }']

['{"response": "Good morning!"}', '{"time": "currently unavailable"}']

In [13]:
response = responses[0]


'{"response": "Good morning!"}'