# Load Packages

In [1]:
import os
from dotenv import load_dotenv

# Load environment variables from .env file
load_dotenv()

True

In [2]:
from nemo_microservices.data_designer.essentials import (
    CategorySamplerParams,
    DataDesignerConfigBuilder,
    LLMTextColumnConfig,
    NeMoDataDesignerClient,
    PersonSamplerParams,
    SamplerColumnConfig,
    SamplerType,
    SubcategorySamplerParams,
    UniformSamplerParams,
)

data_designer_client = NeMoDataDesignerClient(
    base_url="https://ai.api.nvidia.com/v1/nemo/dd",
    default_headers={"Authorization": f"Bearer {os.getenv('NVIDIA_API_KEY')}"}
)

# The following model aliases are availble by default in this hosted Data Designer
# nemotron-nano-v2, nemotron-super, mistral-small, gpt-oss-20b, gpt-oss-120b, llama-4-scout-17b  
model_alias="nemotron-nano-v2"

config_builder = DataDesignerConfigBuilder()


  from .autonotebook import tqdm as notebook_tqdm


In [3]:
###
# This free trial includes:
# - nemotron-nano-v2     → nvidia/nvidia-nemotron-nano-9b-v2
# - nemotron-super       → nvidia/llama-3.3-nemotron-super-49b-v1.5
# - mistral-small        → mistralai/mistral-small-24b-instruct
# - gpt-oss-20b          → openai/gpt-oss-20b
# - gpt-oss-120b         → openai/gpt-oss-120b
# - llama-4-scout-17b    → meta/llama-4-scout-17b-16e-instruct
###

config_builder.add_column(
  SamplerColumnConfig(
      name="product_category",
      sampler_type=SamplerType.CATEGORY,
      params=CategorySamplerParams(
          values=[
              "Electronics",
              "Clothing",
              "Home & Kitchen",
              "Books",
              "Home Office",
          ],
      ),
  )
)

config_builder.add_column(
    SamplerColumnConfig(
        name="product_subcategory",
        sampler_type=SamplerType.SUBCATEGORY,
        params=SubcategorySamplerParams(
            category="product_category",
            values={
                "Electronics": [
                    "Smartphones",
                    "Laptops",
                    "Headphones",
                    "Cameras",
                    "Accessories",
                ],
                "Clothing": [
                    "Men's Clothing",
                    "Women's Clothing",
                    "Winter Coats",
                    "Activewear",
                    "Accessories",
                ],
                "Home & Kitchen": [
                    "Appliances",
                    "Cookware",
                    "Furniture",
                    "Decor",
                    "Organization",
                ],
                "Books": [
                    "Fiction",
                    "Non-Fiction",
                    "Self-Help",
                    "Textbooks",
                    "Classics",
                ],
                "Home Office": [
                    "Desks",
                    "Chairs",
                    "Storage",
                    "Office Supplies",
                    "Lighting",
                ],
            },
        ),
    )
)

config_builder.add_column(
    SamplerColumnConfig(
        name="target_age_range",
        sampler_type=SamplerType.CATEGORY,
        params=CategorySamplerParams(
            values=["18-25", "25-35", "35-50", "50-65", "65+"]
        ),
    )
)

config_builder.add_column(
    SamplerColumnConfig(
        name="customer",
        sampler_type=SamplerType.PERSON,
        params=PersonSamplerParams(age_range=[18, 70]),
    )
)

config_builder.add_column(
    SamplerColumnConfig(
        name="number_of_stars",
        sampler_type=SamplerType.UNIFORM,
        params=UniformSamplerParams(low=1, high=5),
        convert_to="int",
    )
)

config_builder.add_column(
    SamplerColumnConfig(
        name="review_style",
        sampler_type=SamplerType.CATEGORY,
        params=CategorySamplerParams(
            values=["rambling", "brief", "detailed", "structured with bullet points"],
            weights=[1, 2, 2, 1],
        ),
    )
)

In [4]:
config_builder.add_column(
    LLMTextColumnConfig(
        name="product_name",
        prompt=(
            "Come up with a creative product name for a product in the '{{ product_category }}' category, focusing "
            "on products related to '{{ product_subcategory }}'. The target age range of the ideal customer is "
            "{{ target_age_range }} years old. Respond with only the product name, no other text."
        ),
        # This is optional, but it can be useful for controlling the behavior of the LLM. Do not include instructions
        # related to output formatting in the system prompt, as Data Designer handles this based on the column type.
        system_prompt=(
            "You are a helpful assistant that generates product names. You respond with only the product name, "
            "no other text. You do NOT add quotes around the product name."
        ),
        model_alias=model_alias,
    )
)

config_builder.add_column(
    LLMTextColumnConfig(
        name="customer_review",
        prompt=(
            "You are a customer named {{ customer.first_name }} from {{ customer.city }}, {{ customer.state }}. "
            "You are {{ customer.age }} years old and recently purchased a product called {{ product_name }}. "
            "Write a review of this product, which you gave a rating of {{ number_of_stars }} stars. "
            "The style of the review should be '{{ review_style }}'."
        ),
        model_alias=model_alias,
    )
)

In [5]:
preview = data_designer_client.preview(config_builder, num_records=10)

[13:50:26] [INFO] ✅ Validation passed
[13:50:26] [INFO] 🚀 Starting preview generation
[13:50:27] [INFO] ⛓️ Sorting column configs into a Directed Acyclic Graph
[13:50:27] [INFO] 🩺 Running health checks for models...
[13:50:29] [INFO]   |-- 👀 Checking 'nvidia/nvidia-nemotron-nano-9b-v2'...
[13:50:29] [INFO]   |-- ✅ Passed!
[13:50:31] [INFO]   |-- 👀 Checking 'nvidia/llama-3.3-nemotron-super-49b-v1.5'...
[13:50:31] [INFO]   |-- ✅ Passed!
[13:50:32] [INFO]   |-- 👀 Checking 'mistralai/mistral-small-24b-instruct'...
[13:50:32] [INFO]   |-- ✅ Passed!
[13:50:33] [INFO]   |-- 👀 Checking 'openai/gpt-oss-20b'...
[13:50:33] [INFO]   |-- ✅ Passed!
[13:50:34] [INFO]   |-- 👀 Checking 'openai/gpt-oss-120b'...
[13:50:34] [INFO]   |-- ✅ Passed!
[13:50:35] [INFO]   |-- 👀 Checking 'meta/llama-4-scout-17b-16e-instruct'...
[13:50:35] [INFO]   |-- ✅ Passed!
[13:50:35] [INFO] ⏳ Processing batch 1 of 1
[13:50:35] [INFO] 🎲 Preparing samplers to generate 10 records across 6 columns
[13:50:35] [INFO] 🎲 👨‍💻 Initia

In [6]:
preview.display_sample_record()

In [7]:
preview.dataset

Unnamed: 0,product_category,product_subcategory,target_age_range,customer,number_of_stars,review_style,product_name,product_name__reasoning_trace,customer_review,customer_review__reasoning_trace
0,Home Office,Desks,25-35,"{'age': 52, 'bachelors_field': 'stem', 'birth_...",2,structured with bullet points,DeskNexus,"Okay, the user wants a creative product name f...",**Review Title: Disappointing Experience with ...,"Okay, I need to write a structured review with..."
1,Books,Textbooks,18-25,"{'age': 59, 'bachelors_field': 'no degree', 'b...",1,brief,PulseText,"Okay, the user wants a creative product name f...",**1 star.** PulseText is a waste of time and m...,"Okay, let's tackle this. The user wants a 1-st..."
2,Home & Kitchen,Decor,50-65,"{'age': 18, 'bachelors_field': 'no degree', 'b...",1,brief,Heritage Haven,"Okay, the user wants a product name for Home &...",**1 star.** Heritage Haven didn’t meet expecta...,"Okay, let's see. The user wants a 1-star revie..."
3,Clothing,Women's Clothing,18-25,"{'age': 29, 'bachelors_field': 'stem', 'birth_...",2,brief,ChromaVibe,"Okay, the user wants a creative product name f...",**ChromaVibe disappointed. Expected vibrant co...,"Okay, Joseph from Salt Lake City bought Chroma..."
4,Electronics,Accessories,25-35,"{'age': 69, 'bachelors_field': 'no degree', 'b...",3,detailed,NexaVerve,"Okay, the user wants a creative product name f...",**Review Title: NexaVerve – A Mixed Experience...,"Okay, I need to write a detailed 3-star review..."
5,Electronics,Headphones,65+,"{'age': 36, 'bachelors_field': 'arts_humanitie...",2,structured with bullet points,GoldenEars,"Okay, the user wants a creative product name f...",**Review Title: GoldenEars – A Disappointing E...,"Okay, Nettie from Murfreesboro, TN, 36 years o..."
6,Home Office,Chairs,65+,"{'age': 25, 'bachelors_field': 'business', 'bi...",3,detailed,GoldenGlide,"Okay, the user wants a creative product name f...",**Review Title: GoldenGlide – A Mixed Experien...,"Okay, Dean needs to write a 3-star review for ..."
7,Electronics,Laptops,25-35,"{'age': 40, 'bachelors_field': 'education', 'b...",5,structured with bullet points,NexaLap,"Okay, the user wants a creative product name f...",**5-Star Review of NexaLap by Kathryn from Can...,"Okay, I need to write a 5-star review for the ..."
8,Electronics,Laptops,25-35,"{'age': 36, 'bachelors_field': 'no degree', 'b...",4,brief,NexaLuma Pulse,"Okay, the user wants a creative product name f...",**4 stars.** NexaLuma Pulse is reliable and ea...,"Okay, let's tackle this query. The user wants ..."
9,Books,Classics,65+,"{'age': 64, 'bachelors_field': 'education', 'b...",3,brief,Timeless Tales,"Okay, the user wants a creative product name f...",**Timeless Tales: 3 stars.** The stories were ...,"Okay, let's tackle this review. The user is Le..."
