In [35]:
import openai
import os
import json
import langchain
print(langchain.__version__)
from langchain.prompts import PromptTemplate


0.0.323


In [37]:
##### Prompts #####

# load available linguistic_constraints 
with open("linguistic_constraints.json", "r") as f:
    constraints = json.load(f)
print("Select from the available linguistic constraints:", ", ".join(constraints.keys()))

task_description_ptt = """Convert the given SQL query with placeholders into human understandable text. Each placeholder is a combination of table and column names, formatted as "[Table.Column]", like "[User.Name]". 

The generation step should:
* Keep the original placeholders from the SQL queries intact.
* Create text that can be comprehended by people who have no knowledge of relational databases.
{short_or_long}{interrogative_or_directives}
* Generate three translations of text that are diverse in phrasing and syntax, each in a separate line 
* Not include any other text in your response.
"""
list_item_prefix = "\n* "
task_description_pt = PromptTemplate(input_variables=['short_or_long', 'interrogative_or_directives'], template=task_description_ptt)


Select from the available linguistic constraints: short, long, interrogative, directives, formal, casual


In [7]:
from utils import read_template_from_txt_files
all_templates = read_template_from_txt_files()

Number of templates for each table:
Project 7
Capability 2
Employee 12
Company 6
Total number of templates:  27


In [29]:
def generate(sys_msg, usr_msg, model_name = "gpt4-short"):

    response = openai.ChatCompletion.create(
        model=model_name,
        deployment_id=model_name,
        messages=[{"role": "system", "content": sys_msg}, 
               {"role": "user", "content": usr_msg}],
        temperature=1
    )
    return response

def generate_texts(table_name, all_templates, task_description_prompt):
    """ 
    Args:
    task_description_prompt: the prompt for the task description used for system message
    """
    results = {table_name: None for table_name in all_templates.keys()}
    for table_name, sql_templates in all_templates.items():
        texts_for_templates = {}
        for i, sql_template in enumerate(sql_templates):
            response = generate(task_description_prompt, sql_template)
            texts_for_templates[sql_template] = [x for x in response.choices[0].message.content.split("\n") if x]
            print(f"table: {table_name}, template: {i}")
        results[table_name] = texts_for_templates
    # results are stored in a dictionary with the following structure: 
    # { table1: {"sql_template1": [text1, text2], 
    #            "sql_template2": [text1, text2]}, 
    #  {table2: {"sql_template1": [text1, text2], 
    #             "sql_template2": [text1, text2}, 
    # ...
    # }
        
    return results

In [30]:
task_description_prompt = task_description_pt.format(short_or_long=list_item_prefix+ constraints['long'], interrogative_or_directives="")
results = generate_texts(table_name, all_templates, task_description_prompt)
# save results to json file
import json
with open('templates/text/long_for_each_table.json', 'w') as fp:
    json.dump(results, fp)

table: Project, template: 0
table: Project, template: 1
table: Project, template: 2
table: Project, template: 3
table: Project, template: 4
table: Project, template: 5
table: Project, template: 6
table: Capability, template: 0
table: Capability, template: 1
table: Employee, template: 0
table: Employee, template: 1
table: Employee, template: 2
table: Employee, template: 3
table: Employee, template: 4
table: Employee, template: 5
table: Employee, template: 6
table: Employee, template: 7
table: Employee, template: 8
table: Employee, template: 9
table: Employee, template: 10
table: Employee, template: 11
table: Company, template: 0
table: Company, template: 1
table: Company, template: 2
table: Company, template: 3
table: Company, template: 4
table: Company, template: 5


In [31]:
# read results from json file
with open('templates/text/short_for_each_table.json', 'r') as fp:
    short_results = json.load(fp)
with open('templates/text/long_for_each_table.json', 'r') as fp:
    long_results = json.load(fp)
    

In [33]:
def print_stat(results):
    # results:  {table_name: {"sql_template": [text1, text2], ...}, ...}
    total = 0
    for table_name, sql_templates in results.items():
        num_of_queries = 0
        for sql_template, texts in sql_templates.items():
            # print(f"table: {table_name}, template: {sql_template}")
            num_of_queries += len(texts)
        print(f"The number of generated texts for {table_name}: ", num_of_queries)
        # print(texts)
        print("\n")
        total += num_of_queries
    print(f"The total number of generated texts: {total}")

print_stat(short_results)

The number of generated texts for Project:  21


The number of generated texts for Capability:  6


The number of generated texts for Employee:  36


The number of generated texts for Company:  18


The total number of generated texts: 81
