In [2]:
import openai
import os
import langchain
print(langchain.__version__)
from langchain.prompts import PromptTemplate


0.0.323


In [6]:
##### Prompts #####
constraints = {
    "short": """Short and Clear: Keep your queries short and straightforward. Cut down on words and skip parts of speech that aren't crucial, such as conjunctions and articles. It's okay to use fragmented phrases as long as they still convey the full meaning.""",
    "long": """Complex Sentence Structure: Ensure your queries are always in complete sentences. Opt for longer, more complex sentence structures, incorporating elements of speech like conjunctions and articles for fuller expression.""",
    "interrogative": """Interrogative Form: Queries should take the form of direct questions, using interrogative words like "who," "what," "where," "when," "why," and "how." This form directly signals a request for information.""",
    "directives": """Directives: Queries should be framed as directives or commands, such as "Tell me about…" or "Show me…". These are less interrogative but still clearly indicate a request for information.""",
    "formal": """Formal: Queries should be written in a formal tone, using proper grammar and avoiding slang or colloquialisms. This is the most appropriate tone for professional settings.""",
    "casual": """Formal: Queries should be written in a casual tone, using informal grammar and colloquialisms. This is the most appropriate tone for casual settings.""",
}


task_description_ptt = """Convert the given SQL query with placeholders into human understandable text. Each placeholder is a combination of table and column names, formatted as "[Table.Column]", like "[User.Name]". 

The generation step should:
* Keep the original placeholders from the SQL queries intact.
* Create text that can be comprehended by people who have no knowledge of relational databases.
{short_or_long}{interrogative_or_directives}
* Generate three translations of text that are diverse in phrasing and syntax, each in a separate line 
* Not include any other text in your response.
"""
list_item_prefix = "\n* "
task_description_pt = PromptTemplate(input_variables=['short_or_long', 'interrogative_or_directives'], template=task_description_ptt)


In [7]:
from utils import read_template_from_txt_files
all_templates = read_template_from_txt_files()

Number of templates for each table:
Project 7
Capability 2
Employee 12
Company 6
Total number of templates:  27


{'Project': [], 'Capability': [], 'Employee': [], 'Company': []}

In [17]:
def generate(sys_msg, usr_msg, model_name = "gpt4-short"):

    response = openai.ChatCompletion.create(
        model=model_name,
        deployment_id=model_name,
        messages=[{"role": "system", "content": sys_msg}, 
               {"role": "user", "content": usr_msg}],
        temperature=1
    )
    return response

def save_templates_to_text(templates, directory="./templates/text/"):
    filename = f"templates_to_be_extracted.txt"
    filepath = os.path.join(directory, filename)
    with open(filepath, 'w') as file:
        file.write(templates)

sys_msg = task_description_pt.format(short_or_long=list_item_prefix+ constraints['short'], interrogative_or_directives="")
# results are stored in a dictionary with the following structure: 
# { table1: {"sql_template1": [text1, text2], 
#            "sql_template2": [text1, text2]}, 
#  {table2: {"sql_template1": [text1, text2], 
#             "sql_template2": [text1, text2}, 
# ...
# }
results = {table_name: None for table_name in all_templates.keys()}
for table_name, sql_templates in all_templates.items():
    texts_for_templates = {}
    for i, sql_template in enumerate(sql_templates):
        response = generate(sys_msg, sql_template)
        texts_for_templates[sql_template] = [x for x in response.choices[0].message.content.split("\n") if x]
        print(f"table: {table_name}, template: {i}")
    results[table_name] = texts_for_templates
    
    

table: Project, template: 0
table: Project, template: 1
table: Project, template: 2
table: Project, template: 3
table: Project, template: 4
table: Project, template: 5
table: Project, template: 6
table: Capability, template: 0
table: Capability, template: 1
table: Employee, template: 0
table: Employee, template: 1
table: Employee, template: 2
table: Employee, template: 3
table: Employee, template: 4
table: Employee, template: 5
table: Employee, template: 6
table: Employee, template: 7
table: Employee, template: 8
table: Employee, template: 9
table: Employee, template: 10
table: Employee, template: 11
table: Company, template: 0
table: Company, template: 1
table: Company, template: 2
table: Company, template: 3
table: Company, template: 4
table: Company, template: 5


In [18]:
# save results to json file
import json
with open('templates/text/templates_for_each_table.json', 'w') as fp:
    json.dump(results, fp)