In [1]:
import ollama
import pandas as pd
import json

In [73]:
# single model reponse
def model_response(model, user_input, field_names, num_rows):
    system_prompt = """
You are a smart data science assistant who is responsible for generating test data given a particular format. Generate varied data. Don't give generic data that can be found anywhere. Don't give names of real companies or brands.
The user may ask for negative testing, limit testing, or some particular type of data. Please appropriately respond with good varied data.
Always respond in a nested json string format with the data as part of the "data" field. 
Return the requested number of data points only.
For example if given a format as ['Name', 'Class', 'Grade', 'Percentage(per 100)'], field_names=['Name', 'Class', 'Grade', 'Percentage'] for 2 rows, generate a response such as:
"{"data":[{"Name":"Rocky", "Class":"2A", "Grade":"B", "Percentage":"88"}, {"Name":"Barry", "Class":"3B", "Grade":"A", "Percentage":"92"}]}"
You may include extra information about the generated data in a separate field like "comments".
DO NOT CREATE NEW FIELD NAMES. USE ONLY THE FIELD NAMES GIVEN BY THE USER. DO NOT DISPLAY UNITS IN FIELD NAMES
"""
    user_message = user_input + f'\nGenerate {num_rows} rows of data.' + f'\nFields: {field_names}'
    messages = [
        {'role':'system', 'content':system_prompt},
        {'role':'user', 'content':user_message}
    ]
    response = ollama.chat(model=model, messages=messages, format='json')
    print(response['message']['content'])
    generated_rows = json.loads(response['message']['content'])['data']
    return generated_rows


In [74]:
# example use case
user_input="""
Generate data of the format [company_name, page_thickness(mm), copies_sold, price(per 100k pages)] for a database consisting of printer page companies.
"""
field_names = ['company_name, page_thickness', 'copies_sold', 'price']
num_rows = 10
llama_response = model_response('llama3.1:8b', user_input, field_names, num_rows)

{
    "data": [
        {
            "company_name": "ElitePrint",
            "page_thickness": "0.5",
            "copies_sold": "5421",
            "price": "12"
        },
        {
            "company_name": "PageMaster",
            "page_thickness": "0.7",
            "copies_sold": "8192",
            "price": "8"
        },
        {
            "company_name": "ThickPages Inc.",
            "page_thickness": "1.2",
            "copies_sold": "1279",
            "price": "20"
        },
        {
            "company_name": "PrintPro",
            "page_thickness": "0.3",
            "copies_sold": "9234",
            "price": "15"
        },
        {
            "company_name": "PaperWorks",
            "page_thickness": "1.5",
            "copies_sold": "1500",
            "price": "25"
        },
        {
            "company_name": "ThermalPress",
            "page_thickness": "0.9",
            "copies_sold": "6789",
            "price": "18"
        },
        {
    

In [75]:
qwen_response = model_response('qwen2.5:7b', user_input, field_names, num_rows)
gemma_response = model_response('gemma3:4b', user_input, field_names, num_rows)

{
    "data": [
        {"company_name": "PrintMaster", "page_thickness": "0.05", "copies_sold": "200000", "price": "15"},
        {"company_name": "PagePro", "page_thickness": "0.07", "copies_sold": "180000", "price": "12"},
        {"company_name": "DocuPrint", "page_thickness": "0.06", "copies_sold": "300000", "price": "14"},
        {"company_name": "CopyFlex", "page_thickness": "0.08", "copies_sold": "250000", "price": "11"},
        {"company_name": "PrintQuick", "page_thickness": "0.09", "copies_sold": "320000", "price": "16"},
        {"company_name": "PagePilot", "page_thickness": "0.10", "copies_sold": "400000", "price": "20"},
        {"company_name": "DocuFlex", "page_thickness": "0.075", "copies_sold": "380000", "price": "18"},
        {"company_name": "PrintFlow", "page_thickness": "0.065", "copies_sold": "280000", "price": "14"},
        {"company_name": "PagePrime", "page_thickness": "0.055", "copies_sold": "350000", "price": "13"},
        {"company_name": "PrintPrime"

In [78]:
generated_data = []
generated_data = generated_data + llama_response + qwen_response + gemma_response
for i in range(len(generated_data)):
    print(generated_data[i])

{'company_name': 'ElitePrint', 'page_thickness': '0.5', 'copies_sold': '5421', 'price': '12'}
{'company_name': 'PageMaster', 'page_thickness': '0.7', 'copies_sold': '8192', 'price': '8'}
{'company_name': 'ThickPages Inc.', 'page_thickness': '1.2', 'copies_sold': '1279', 'price': '20'}
{'company_name': 'PrintPro', 'page_thickness': '0.3', 'copies_sold': '9234', 'price': '15'}
{'company_name': 'PaperWorks', 'page_thickness': '1.5', 'copies_sold': '1500', 'price': '25'}
{'company_name': 'ThermalPress', 'page_thickness': '0.9', 'copies_sold': '6789', 'price': '18'}
{'company_name': 'PageWorks Co.', 'page_thickness': '1.8', 'copies_sold': '2345', 'price': '22'}
{'company_name': 'CopyCat Printing', 'page_thickness': '0.2', 'copies_sold': '9999', 'price': '11'}
{'company_name': 'QuickPrint', 'page_thickness': '1.1', 'copies_sold': '1111', 'price': '21'}
{'company_name': 'PaperPioneers', 'page_thickness': '0.6', 'copies_sold': '7890', 'price': '19'}
{'company_name': 'PrintMaster', 'page_thickn

In [79]:
generated_df = pd.DataFrame.from_records(generated_data)
print(generated_df)

               company_name page_thickness copies_sold price
0                ElitePrint            0.5        5421    12
1                PageMaster            0.7        8192     8
2           ThickPages Inc.            1.2        1279    20
3                  PrintPro            0.3        9234    15
4                PaperWorks            1.5        1500    25
5              ThermalPress            0.9        6789    18
6             PageWorks Co.            1.8        2345    22
7          CopyCat Printing            0.2        9999    11
8                QuickPrint            1.1        1111    21
9             PaperPioneers            0.6        7890    19
10              PrintMaster           0.05      200000    15
11                  PagePro           0.07      180000    12
12                DocuPrint           0.06      300000    14
13                 CopyFlex           0.08      250000    11
14               PrintQuick           0.09      320000    16
15                PagePi