In [None]:
%pip install 'git+https://github.com/vanna-ai/vanna@main#egg=vanna[snowflake]'

In [None]:
import os
import prompt
import ai
from typing import Dict, List
from dotenv import load_dotenv
import pandas as pd
import vanna as vn
import time
load_dotenv()

In [None]:
questions_df = pd.read_csv("../data/questions_sec.csv")
questions_df = questions_df.reset_index()
questions_df = questions_df.rename({'index': 'question_order'}, axis=1)

In [None]:
len(questions_df)

In [None]:
vn.set_api_key(os.environ['VANNA_API_KEY'])
vn.connect_to_snowflake(os.environ['SNOWFLAKE_ACCOUNT'], os.environ['SNOWFLAKE_USER'], os.environ['SNOWFLAKE_PASSWORD'], os.environ['SNOWFLAKE_DATABASE'])

In [None]:
vanna_models = ['cybersyn-sec-0', 'cybersyn-sec-3', 'cybersyn-sec-100']
#foundational_models = ['gpt-3.5-turbo', 'gpt-4', 'code-bison@001', 'llama-2-70b-chat']
foundational_models =['gpt-3.5-turbo', 'gpt-4', 'code-bison@001']

evaluation_data = []

for vanna_model in vanna_models:
    vn.set_model(vanna_model)
    
    for _, row in questions_df.iterrows():
      for model in foundational_models: 
          question = row['question']
          print(f"{model} + {vanna_model}: {question}")
          related = vn.get_related_training_data(question)
          
          try:
              if model in ['gpt-3.5-turbo', 'gpt-4']:
                  message_log = prompt.get_message_log_prompt(16000, question, related.ddl, related.documentation, related.questions)
                  sql = ai.send_to_openai_chat(model, message_log)
              elif model == 'code-bison@001':
                  message = prompt.get_single_message_prompt(16000, question, related.ddl, related.documentation, related.questions)
                  sql = ai.send_to_vertexai(message)
              elif model == 'llama-2-70b-chat':
                  message = prompt.get_single_message_prompt(9000, question, related.ddl, related.documentation, related.questions)
                  sql = ai.send_to_replicate(message)
        
              try:
                df = vn.run_sql(sql)
                error = None
              except Exception as e:
                df = None
                error = str(e)
          except Exception as e:
              sql = None
              print(e)
              df = None
              error = str(e)

          print(sql)
          
          evaluation_data.append({
              "question_order": row['question_order'],
              "question": question,
              "parameter_description": f"{model} + {vanna_model}",
              "foundational_model": model,
              "vanna_model": vanna_model,
              "sql": sql,
              "df_str": str(df),
              "error": error
          })
      time.sleep(5)

In [None]:
df = pd.DataFrame(evaluation_data).sort_values(['question_order', 'vanna_model', 'foundational_model'])
df.to_csv("../data/sec_evaluation_data.csv")
df