In [62]:
from LLMObject import LLMFactory

# Initialize the factory and get the LLM object
factory = LLMFactory('config.yaml')

In [63]:
llm = factory.get_llm()

In [64]:
from langchain_core.messages import AIMessage

messages = [
    (
        "system",
        "You are a helpful assistant that translates English to French. Translate the user sentence.",
    ),
    ("human", "I love programming."),
]
ai_msg = llm.invoke(messages)
ai_msg

INFO:httpx:HTTP Request: POST http://127.0.0.1:11434/api/chat "HTTP/1.1 200 OK"


AIMessage(content=" J'adore programmer.", response_metadata={'model': 'phi3:3.8b', 'created_at': '2024-09-12T17:09:54.407782879Z', 'message': {'role': 'assistant', 'content': ''}, 'done_reason': 'stop', 'done': True, 'total_duration': 2708212760, 'load_duration': 2504302489, 'prompt_eval_count': 27, 'prompt_eval_duration': 26031000, 'eval_count': 7, 'eval_duration': 125489000}, id='run-df4a070a-1c3c-43ca-9da3-254d8a0a39a6-0', usage_metadata={'input_tokens': 27, 'output_tokens': 7, 'total_tokens': 34})

In [65]:
import json
from langchain.llms import OpenAI
from langchain.chains import LLMChain
from langchain.prompts import PromptTemplate

# Function to load tables data from a JSON file
def load_tables_from_json(file_path):
    with open(file_path, 'r') as file:
        tables = json.load(file)
    return tables

# Function to extract column details for a given dataset name
def get_column_details(tables, dataset_name):
    """
    Extract column details for a given dataset name from the tables JSON data.

    Parameters:
    tables (list): A list of dictionaries representing datasets and their details.
    dataset_name (str): The name of the dataset to extract column details for.

    Returns:
    dict: A dictionary containing the details of the specified dataset.
    """
    for table in tables:
        if table['dataset'] == dataset_name:
            return table
    return None

# Function to generate a detailed prompt for LLM based on column details
def generate_llm_prompt(table_details):
    """
    Generate a detailed prompt for the LLM to generate synthetic data based on the table details.

    Parameters:
    table_details (dict): The details of the table for which to generate the LLM prompt.

    Returns:
    str: A detailed prompt for the LLM.
    """
    prompt = ""
    description = table_details["description"]
    label = table_details["label"]

    for property in table_details['properties']:
        prompt += f"- attribute:{property['name']} attribute name:{property['label']} Type:{property['type']} Description:{property['description']}\n"

    # prompt += "\nPlease generate a synthetic dataset in json format which would be array of key value pair, where key is the attribute and value is value, ensuring that the values conform to the descriptions and constraints provided for each column. The data should mimic the distribution, range, and format as closely as possible to real-world data. Ensure that referential integrity is maintained where applicable, and include a variety of values within the allowed constraints to reflect realistic data variability.\n"

    return prompt, description, label

# Load the tables data from the JSON file
file_path = 'data/cols.json'  # Replace with the path to your JSON file
tables = load_tables_from_json(file_path)

# Example usage: Generate a prompt for the 'customers' dataset
dataset_name = 'payments'  # Replace with the desired dataset name
table_details = get_column_details(tables, dataset_name)

col_prompt, description, label = generate_llm_prompt(table_details)
# print(col_prompt)  # This will display the detailed prompt for the LLM

# LangChain integration
# llm = OpenAI(model="text-davinci-003")  # Replace with your LLM model configuration
items_count = 5
format = "json"

ai_message = '''<s>[INST] You are an AI model tasked with generating synthetic data for the Dataset: "{dataset_name}" dataset used in a travel agency. Below are the details of the columns for this dataset:
The Dataset description: {dataset_description}
{col_prompt} [/INST]</s>
[INST]Please generate a synthetic dataset in json format which would be array of key value pair, where key is the attribute and value is value, ensuring that the values conform to the descriptions and constraints provided for each column. The data should mimic the distribution, range, and format as closely as possible to real-world data. Ensure that referential integrity is maintained where applicable, and include a variety of values within the allowed constraints to reflect realistic data variability. Generate {items_count} items only and generate response in {format} only.[/INST]'''

prompt_template = PromptTemplate.from_template(ai_message)
ll = prompt_template.format(dataset_name=dataset_name, dataset_description=description, col_prompt=col_prompt, items_count=items_count, format=format)

# ll = prompt_template.format({
#     "dataset_name": dataset_name,
#     "dataset_description":description,
#     "col_prompt":col_prompt
# })

print(ll)

# # Create a LangChain LLMChain
# llm_chain = LLMChain(llm=llm, prompt=prompt_template)

# # Execute the LLMChain with the generated prompt
# response = llm_chain.run(dataset_description=llm_prompt)
# print(response)



<s>[INST] You are an AI model tasked with generating synthetic data for the Dataset: "payments" dataset used in a travel agency. Below are the details of the columns for this dataset:
The Dataset description: This table tracks payment transactions related to customer bookings. It includes details on payment methods, transaction dates, and payment amounts.
- attribute:payment_id attribute name:Payment ID Type:serial Description:Unique identifier for each payment transaction. This is an auto-incrementing integer that serves as the primary key.
- attribute:booking_id attribute name:Booking ID Type:integer Description:The identifier linking the payment to a specific booking.
- attribute:amount attribute name:Payment Amount Type:numeric(10, 2) Description:The amount paid in the transaction.
- attribute:payment_method attribute name:Payment Method Type:varchar(50) Description:The method used for payment, such as credit card, PayPal, or bank transfer.
- attribute:transaction_date attribute na

In [66]:
from langchain_community.llms import Bedrock
from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler
from langchain_core.output_parsers import StrOutputParser
from langchain_core.prompts import ChatPromptTemplate
from langchain_core.output_parsers import JsonOutputParser


custom_llm = Bedrock(
    credentials_profile_name="bedrock-admin",
    # provider="cohere",
    model_id="mistral.mixtral-8x7b-instruct-v0:1",  # ARN like 'arn:aws:bedrock:...' obtained via provisioning the custom model
    model_kwargs={"temperature": 0.7, "top_p":0.7, "max_tokens":400, "top_k":50},
    streaming=True,
    callbacks=[StreamingStdOutCallbackHandler()],
)


chain = prompt_template | custom_llm | JsonOutputParser()

# mm = chain.invoke(input={
#     "dataset_name":dataset_name,
#     "dataset_description":description,
#     "col_prompt":col_prompt,
#     "items_count":items_count,
#     "format":format
# })



# ll = prompt_template.format(dataset_name=dataset_name, dataset_description=description, col_prompt=col_prompt, items_count=items_count, format=format)

# <s>[INST] You are an AI model tasked with generating synthetic data for the Dataset: "customers" dataset used in a travel agency. Below are the details of the columns for this dataset:
#     The Dataset description: This table contains detailed information about the customers who use the travel agency's services. It captures personal details, contact information, and preferences, which help the agency in providing personalized travel experiences.
#     - attribute:customer_id attribute name:Customer ID Type:serial Description:Unique identifier for each customer. This is an auto-incrementing integer that serves as the primary key.
# - attribute:first_name attribute name:First Name Type:varchar(255) Description:The first name of the customer, used for personalized communication and records.
# - attribute:last_name attribute name:Last Name Type:varchar(255) Description:The last name of the customer, helping to identify and distinguish customers.
# - attribute:email attribute name:Email Address Type:varchar(255) Description:The primary email address of the customer used for communication, including booking confirmations and promotional offers.
# - attribute:phone_number attribute name:Phone Number Type:varchar(20) Description:The customer's contact number, used for urgent communications and verification purposes.
# - attribute:date_of_birth attribute name:Date of Birth Type:date Description:The date of birth of the customer, useful for providing age-specific offers and travel insurance.
# - attribute:preferences attribute name:Travel Preferences Type:jsonb Description:Customer's travel preferences such as preferred destinations, travel class, and special requirements, aiding in personalized service delivery.
# - attribute:created_at attribute name:Record Created At Type:timestamp Description:Timestamp indicating when the customer record was created.
# - attribute:updated_at attribute name:Record Updated At Type:timestamp Description:Timestamp indicating the last update to the customer record. [/INST]</s> 
# [INST] Please generate a synthetic dataset in json format which would be array of key value pair, where key is the attribute and value is value, ensuring that the values conform to the descriptions and constraints provided for each column. The data should mimic the distribution, range, and format as closely as possible to real-world data. Ensure that referential integrity is maintained where applicable, and include a variety of values within the allowed constraints to reflect realistic data variability. Generate 5 items only and generate response in json only. [/INST]


# aws bedrock-runtime invoke-model \
# --model-id mistral.mixtral-8x7b-instruct-v0:1 \
# --body "{\"max_tokens\":400,\"top_p\":0.7,\"temperature\":0.7,\"top_k\":50,\"prompt\":\"<s>[INST] You are an AI model tasked with generating synthetic data for the Dataset: \\\"customers\\\" dataset used in a travel agency. Below are the details of the columns for this dataset:\\r\\n    The Dataset description: This table contains detailed information about the customers who use the travel agency's services. It captures personal details, contact information, and preferences, which help the agency in providing personalized travel experiences.\\r\\n    - attribute:customer_id attribute name:Customer ID Type:serial Description:Unique identifier for each customer. This is an auto-incrementing integer that serves as the primary key.\\r\\n- attribute:first_name attribute name:First Name Type:varchar(255) Description:The first name of the customer, used for personalized communication and records.\\r\\n- attribute:last_name attribute name:Last Name Type:varchar(255) Description:The last name of the customer, helping to identify and distinguish customers.\\r\\n- attribute:email attribute name:Email Address Type:varchar(255) Description:The primary email address of the customer used for communication, including booking confirmations and promotional offers.\\r\\n- attribute:phone_number attribute name:Phone Number Type:varchar(20) Description:The customer's contact number, used for urgent communications and verification purposes.\\r\\n- attribute:date_of_birth attribute name:Date of Birth Type:date Description:The date of birth of the customer, useful for providing age-specific offers and travel insurance.\\r\\n- attribute:preferences attribute name:Travel Preferences Type:jsonb Description:Customer's travel preferences such as preferred destinations, travel class, and special requirements, aiding in personalized service delivery.\\r\\n- attribute:created_at attribute name:Record Created At Type:timestamp Description:Timestamp indicating when the customer record was created.\\r\\n- attribute:updated_at attribute name:Record Updated At Type:timestamp Description:Timestamp indicating the last update to the customer record. [/INST]</s> \\n[INST] Please generate a synthetic dataset in json format which would be array of key value pair, where key is the attribute and value is value, ensuring that the values conform to the descriptions and constraints provided for each column. The data should mimic the distribution, range, and format as closely as possible to real-world data. Ensure that referential integrity is maintained where applicable, and include a variety of values within the allowed constraints to reflect realistic data variability. Generate 5 items only and generate response in json only. [/INST]\\n [\\n  {\\n    \\\"customer_id\\\": 1,\\n    \\\"first_name\\\": \\\"John\\\",\\n    \\\"last_name\\\": \\\"Doe\\\",\\n    \\\"email\\\": \\\"john.doe@example.com\\\",\\n    \\\"phone_number\\\": \\\"123-456-7890\\\",\\n    \\\"date_of_birth\\\": \\\"1980-01-01\\\",\\n    \\\"preferences\\\": '{\\\"preferred_destinations\\\": [\\\"Paris\\\", \\\"New York\\\"],\\\"travel_class\\\": \\\"business\\\",\\\"special_requirements\\\": null}',\\n    \\\"created_at\\\": \\\"2021-01-01T12:34:56Z\\\",\\n    \\\"updated_at\\\": \\\"2021-01-01T12:34:56Z\\\"\\n  },\\n  {\\n    \\\"customer_id\\\": 2,\\n    \\\"first_name\\\": \\\"Jane\\\",\\n    \\\"last_name\\\": \\\"Smith\\\",\\n    \\\"email\\\": \\\"jane.smith@example.com\\\",\\n    \\\"phone_number\\\": \\\"987-654-3210\\\",\\n    \\\"date_of_birth\\\": \\\"1995-03-15\\\",\\n    \\\"preferences\\\": '{\\\"preferred_destinations\\\": [\\\"Tokyo\\\", \\\"Rome\\\"],\\\"travel_class\\\": \\\"economy\\\",\\\"special_requirements\\\": {\\\"dietary_restrictions\\\": \\\"vegetarian\\\"}}',\\n    \\\"created_at\\\": \\\"2021-02-01T09:10:11Z\\\",\\n    \\\"updated_at\\\": \\\"2021-02-01T09:10:11Z\\\"\\n  },\"}" \
# --cli-binary-format raw-in-base64-out \
# --region ap-south-1 \
# invoke-model-output.txt

INFO:botocore.credentials:Found credentials in shared credentials file: ~/.aws/credentials


In [67]:
p = {
    "dataset_name":dataset_name,
    "dataset_description":description,
    "col_prompt":col_prompt,
    "items_count":items_count,
    "format":format
}

# type(prompt_template.format(**p))
ai_message.format(**p)

'<s>[INST] You are an AI model tasked with generating synthetic data for the Dataset: "payments" dataset used in a travel agency. Below are the details of the columns for this dataset:\nThe Dataset description: This table tracks payment transactions related to customer bookings. It includes details on payment methods, transaction dates, and payment amounts.\n- attribute:payment_id attribute name:Payment ID Type:serial Description:Unique identifier for each payment transaction. This is an auto-incrementing integer that serves as the primary key.\n- attribute:booking_id attribute name:Booking ID Type:integer Description:The identifier linking the payment to a specific booking.\n- attribute:amount attribute name:Payment Amount Type:numeric(10, 2) Description:The amount paid in the transaction.\n- attribute:payment_method attribute name:Payment Method Type:varchar(50) Description:The method used for payment, such as credit card, PayPal, or bank transfer.\n- attribute:transaction_date attri

In [68]:
import json
import logging
import boto3
import boto3.session
from botocore.exceptions import ClientError
from pydantic import BaseModel
from enum import Enum
import functools

logger = logging.getLogger(__name__)
logging.basicConfig(level=logging.INFO)


class BaseErrorCodes(Enum):
    AWS_SESSION_FAILED = (100, "AWSSessionFailed", "AWS Session creation failed")
    GENAI_RESPONSE_FAILED = (101, "GenAIResponseFailed", "Generative AI response generation failed")
    # Add more error codes as needed

    def __init__(self, code: int, id: str, desc: str):
        self.code = code
        self.id = id
        self.desc = desc


class BaseErrorResponse(BaseModel):
    error_message: str
    error_reason: str
    error_code: int
    error_name: str
    technical_error_description: str | None = None  # Optional field


class SolutionBaseException(Exception):

    def __init__(self, 
                 error_message: str, 
                 error_reason: str, 
                 error_code: int, 
                 error_name: str, 
                 technical_error_description: str | None = None):
        # Create APIErrorResponse object with the given parameters
        self.error_response = BaseErrorResponse(
            error_message=error_message,
            error_reason=error_reason,
            error_code=error_code,
            error_name=error_name,
            technical_error_description=technical_error_description
        )
        super().__init__(error_message)

    def __str__(self):
        return (
            f"Error Code: {self.error_response.error_code}, "
            f"Error Name: {self.error_response.error_name}, "
            f"Message: {self.error_response.error_message}, "
            f"Reason: {self.error_response.error_reason}, "
            f"Technical Description: {self.error_response.technical_error_description or 'N/A'}"
        )    



# Define the parameterized decorator
def log_metcustom_decorator(error: BaseErrorCodes):
    def decorator(func):
        @functools.wraps(func)
        def wrapper(*args, **kwargs):
            class_name = args[0].__class__.__name__  # Get the class name
            method_name = func.__name__  # Get the method name
            logger.info(f"Calling {class_name}.{method_name} with args: {args[1:]}, kwargs: {kwargs}")
            try:
                result = func(*args, **kwargs)  # Call the actual method
                return result
            except Exception as e:
                raise BaseErrorResponse(
                    error_message=error.desc,
                    error_reason=str(e),
                    error_code=error.code,
                    error_name=error.id,
                    technical_error_description="NA"
                )                
            # logger.info(f"{custom_message} - {class_name}.{method_name} returned: {result}")
        return wrapper
    return decorator


class BedrockGenAIModel:

    @log_metcustom_decorator(BaseErrorCodes.AWS_SESSION_FAILED)
    def __init__(self,
                 profile_name:str="bedrock-admin"
                 ):
        boto3_session = boto3.session.Session(
            profile_name = "bedrock-admin"
        )
        self.bedrock_client = boto3_session.client('bedrock-runtime')

    @log_metcustom_decorator(BaseErrorCodes.GENAI_RESPONSE_FAILED)
    def invoke_genai(   self,
                        prompt:str,
                        model_id:str = "mistral.mixtral-8x7b-instruct-v0:1",
                        max_tokens:int = 1000, 
                        temperature:float = 0.1, 
                        top_p:float = 0.7, 
                        top_k:float = 50
                    ):
        self.body = json.dumps({
            "prompt": prompt,
            "max_tokens": max_tokens,
            "temperature": temperature,
            "top_p": top_p,
            "top_k": top_k
        })

        self.response = self.bedrock_client.invoke_model(
            body=self.body,
            # contentType='string',
            # accept='string',
            modelId='mistral.mixtral-8x7b-instruct-v0:1',
            # trace='ENABLED'|'DISABLED',
            # guardrailIdentifier='string',
            # guardrailVersion='string'
        )

        return self.body
    

    def response_text(self):
        return json.loads(self.response.get('body').read())

        

class GenAITextGenerator:

    def build_prompt(self, prompt_template:str, params:dict):
        return prompt_template.format(**params)
    

    def __init__(self, 
                 prompt_template:str, 
                 params:dict, 
                 max_tokens:int = 1000, 
                 temperature:float = 0.1, 
                 top_p:float = 0.7, 
                 top_k:float = 50):
        
        self.genai_client = BedrockGenAIModel(profile_name = "bedrock-admin")
        self.prompt = self.build_prompt(prompt_template, params)


    def generate_text(self, 
                        max_tokens:int = 1000, 
                        temperature:float = 0.1, 
                        top_p:float = 0.7, 
                        top_k:float = 50
                      ):

        # logger.info("Generating text with Mistral AI model %s", model_id)

        # bedrock = boto3.client(service_name='bedrock-runtime')

        response = self.genai_client.invoke_genai(
            prompt=self.prompt
        )

        print(response)

        return self.genai_client.response_text()




In [69]:
p = {
    "dataset_name":dataset_name,
    "dataset_description":description,
    "col_prompt":col_prompt,
    "items_count":items_count,
    "format":format
}

ai_message = '''<s>[INST] You are an AI model tasked with generating synthetic data for the Dataset: "{dataset_name}" dataset used in a travel agency. Below are the details of the columns for this dataset:
The Dataset description: {dataset_description}
{col_prompt} [/INST]</s>
[INST]Please generate a synthetic dataset in json format which would be array of key value pair, where key is the attribute and value is value, ensuring that the values conform to the descriptions and constraints provided for each column. The data should mimic the distribution, range, and format as closely as possible to real-world data. Ensure that referential integrity is maintained where applicable, and include a variety of values within the allowed constraints to reflect realistic data variability. Generate {items_count} items only and generate response in {format} only without any additional strings and escape sequences.[/INST]'''


genai_res = GenAITextGenerator(prompt_template=ai_message, params=p)

res = genai_res.generate_text()

INFO:__main__:Calling BedrockGenAIModel.__init__ with args: (), kwargs: {'profile_name': 'bedrock-admin'}
INFO:botocore.credentials:Found credentials in shared credentials file: ~/.aws/credentials
INFO:__main__:Calling BedrockGenAIModel.invoke_genai with args: (), kwargs: {'prompt': '<s>[INST] You are an AI model tasked with generating synthetic data for the Dataset: "payments" dataset used in a travel agency. Below are the details of the columns for this dataset:\nThe Dataset description: This table tracks payment transactions related to customer bookings. It includes details on payment methods, transaction dates, and payment amounts.\n- attribute:payment_id attribute name:Payment ID Type:serial Description:Unique identifier for each payment transaction. This is an auto-incrementing integer that serves as the primary key.\n- attribute:booking_id attribute name:Booking ID Type:integer Description:The identifier linking the payment to a specific booking.\n- attribute:amount attribute na

{"prompt": "<s>[INST] You are an AI model tasked with generating synthetic data for the Dataset: \"payments\" dataset used in a travel agency. Below are the details of the columns for this dataset:\nThe Dataset description: This table tracks payment transactions related to customer bookings. It includes details on payment methods, transaction dates, and payment amounts.\n- attribute:payment_id attribute name:Payment ID Type:serial Description:Unique identifier for each payment transaction. This is an auto-incrementing integer that serves as the primary key.\n- attribute:booking_id attribute name:Booking ID Type:integer Description:The identifier linking the payment to a specific booking.\n- attribute:amount attribute name:Payment Amount Type:numeric(10, 2) Description:The amount paid in the transaction.\n- attribute:payment_method attribute name:Payment Method Type:varchar(50) Description:The method used for payment, such as credit card, PayPal, or bank transfer.\n- attribute:transacti

In [70]:
res
# "\\_sdsd".replace("\\_", "")

{'outputs': [{'text': ' [\n{\n"payment\\_id": 1,\n"booking\\_id": 12345,\n"amount": 456.78,\n"payment\\_method": "credit card",\n"transaction\\_date": "2022-03-01 14:30:00",\n"status": "completed",\n"created\\_at": "2022-03-01 14:30:00",\n"updated\\_at": "2022-03-01 14:30:00"\n},\n{\n"payment\\_id": 2,\n"booking\\_id": 23456,\n"amount": 890.12,\n"payment\\_method": "PayPal",\n"transaction\\_date": "2022-03-02 09:15:00",\n"status": "pending",\n"created\\_at": "2022-03-02 09:15:00",\n"updated\\_at": "2022-03-02 09:15:00"\n},\n{\n"payment\\_id": 3,\n"booking\\_id": 34567,\n"amount": 333.00,\n"payment\\_method": "bank transfer",\n"transaction\\_date": "2022-03-03 16:45:00",\n"status": "completed",\n"created\\_at": "2022-03-03 16:45:00",\n"updated\\_at": "2022-03-03 16:45:00"\n},\n{\n"payment\\_id": 4,\n"booking\\_id": 45678,\n"amount": 111.55,\n"payment\\_method": "credit card",\n"transaction\\_date": "2022-03-04 11:00:00",\n"status": "failed",\n"created\\_at": "2022-03-04 11:00:00",\n"upd

In [71]:
json.loads(res.get('outputs')[0]['text'].replace("\\_", "_"))

[{'payment_id': 1,
  'booking_id': 12345,
  'amount': 456.78,
  'payment_method': 'credit card',
  'transaction_date': '2022-03-01 14:30:00',
  'status': 'completed',
  'created_at': '2022-03-01 14:30:00',
  'updated_at': '2022-03-01 14:30:00'},
 {'payment_id': 2,
  'booking_id': 23456,
  'amount': 890.12,
  'payment_method': 'PayPal',
  'transaction_date': '2022-03-02 09:15:00',
  'status': 'pending',
  'created_at': '2022-03-02 09:15:00',
  'updated_at': '2022-03-02 09:15:00'},
 {'payment_id': 3,
  'booking_id': 34567,
  'amount': 333.0,
  'payment_method': 'bank transfer',
  'transaction_date': '2022-03-03 16:45:00',
  'status': 'completed',
  'created_at': '2022-03-03 16:45:00',
  'updated_at': '2022-03-03 16:45:00'},
 {'payment_id': 4,
  'booking_id': 45678,
  'amount': 111.55,
  'payment_method': 'credit card',
  'transaction_date': '2022-03-04 11:00:00',
  'status': 'failed',
  'created_at': '2022-03-04 11:00:00',
  'updated_at': '2022-03-04 11:00:00'},
 {'payment_id': 5,
  'boo

In [58]:
import yaml

with open("config.yaml", 'r') as file:
    config = yaml.safe_load(file)

In [61]:
config["llm_config"]["bedrock"]["prompts"]["SYNTHETIC_DATA_GEN"].format(**p)

'"<s>[INST] You are an AI model tasked with generating synthetic data for the Dataset: "payments" dataset used in a travel agency. Below are the details of the columns for this dataset:\n The Dataset description: This table tracks payment transactions related to customer bookings. It includes details on payment methods, transaction dates, and payment amounts.\n - attribute:payment_id attribute name:Payment ID Type:serial Description:Unique identifier for each payment transaction. This is an auto-incrementing integer that serves as the primary key.\n- attribute:booking_id attribute name:Booking ID Type:integer Description:The identifier linking the payment to a specific booking.\n- attribute:amount attribute name:Payment Amount Type:numeric(10, 2) Description:The amount paid in the transaction.\n- attribute:payment_method attribute name:Payment Method Type:varchar(50) Description:The method used for payment, such as credit card, PayPal, or bank transfer.\n- attribute:transaction_date at

In [45]:
config

{'llm_config': {'type': 'ollama',
  'ollama': {'model': 'phi3:3.8b',
   'temperature': 0.5,
   'endpoint': 'http://localhost:11434'},
  'bedrock': {'model': 'amazon.titan-tg1-large',
   'region': 'us-east-1',
   'prompts': {'SYNTHETIC_DATA_GEN': '"<s>[INST] You are an AI model tasked with generating synthetic data for the Dataset: "{dataset_name}" dataset used in a travel agency. Below are the details of the columns for this dataset:\n The Dataset description: {dataset_description}\n {col_prompt} [/INST]</s>\n [INST]Please generate a synthetic dataset in json format which would be array of key value pair, where key is the attribute and value is value, ensuring that the values conform to the descriptions and constraints provided for each column. \n The data should mimic the distribution, range, and format as closely as possible to real-world data. \n Ensure that referential integrity is maintained where applicable, and include a variety of values within the allowed constraints to reflec

In [60]:
p

{'dataset_name': 'payments',
 'dataset_description': 'This table tracks payment transactions related to customer bookings. It includes details on payment methods, transaction dates, and payment amounts.',
 'col_prompt': '- attribute:payment_id attribute name:Payment ID Type:serial Description:Unique identifier for each payment transaction. This is an auto-incrementing integer that serves as the primary key.\n- attribute:booking_id attribute name:Booking ID Type:integer Description:The identifier linking the payment to a specific booking.\n- attribute:amount attribute name:Payment Amount Type:numeric(10, 2) Description:The amount paid in the transaction.\n- attribute:payment_method attribute name:Payment Method Type:varchar(50) Description:The method used for payment, such as credit card, PayPal, or bank transfer.\n- attribute:transaction_date attribute name:Transaction Date Type:timestamp Description:The date and time when the payment transaction occurred.\n- attribute:status attribute