# Setup (only run once)
1. Makes a copy of the required tables to avoid deletion
2. Makes a VS endpoint and index from a delta table

In [0]:
%pip install -qqqq -U -r requirements.txt
%pip install mlflow-skinny databricks_langchain==0.5.0
# Restart to load the packages into the Python environment
dbutils.library.restartPython()

In [0]:
from mlflow.models import ModelConfig

config = ModelConfig(development_config="../02_agent/config.yml")
config.to_dict()

In [0]:
catalog_name = config.get('catalog')
schema_name = config.get('schema')

spark.sql(f"CREATE CATALOG IF NOT EXISTS {catalog_name}")
spark.sql(f"CREATE SCHEMA IF NOT EXISTS {catalog_name}.{schema_name}")

## Make a copy of the required tables
In case they get deleted

In [0]:
spark.sql(f"DROP TABLE IF EXISTS {catalog_name}.{schema_name}.cust_service_data")
spark.sql(f"""
    CREATE TABLE {catalog_name}.{schema_name}.cust_service_data AS 
    SELECT * FROM retail_prod.agents.cust_service_data
""")

In [0]:
spark.sql(f"DROP TABLE IF EXISTS {catalog_name}.{schema_name}.policies")
spark.sql(f"""
    CREATE TABLE {catalog_name}.{schema_name}.policies AS 
    SELECT * FROM retail_prod.agents.policies
""")

In [0]:
spark.sql(f"DROP TABLE IF EXISTS {catalog_name}.{schema_name}.product_docs")
spark.sql(f"""
    CREATE TABLE {catalog_name}.{schema_name}.product_docs AS 
    SELECT * FROM retail_prod.agents.product_docs
""")

In [0]:
%sql
SHOW TBLPROPERTIES yen_training.agents.product_docs

In [0]:
spark.sql(f"""
ALTER TABLE {catalog_name}.{schema_name}.product_docs SET TBLPROPERTIES (delta.enableChangeDataFeed = true)
""")

## Create the Vector Store endpoint and index from the source delta table
See more in this [VS guide](https://docs.databricks.com/aws/en/generative-ai/create-query-vector-search)

In [0]:
from databricks.vector_search.client import VectorSearchClient

client = VectorSearchClient()

# Create VS endpoint
# Check if the endpoint already exists
endpoints = client.list_endpoints()
endpoint_name = config.get('retriever')['vs_endpoint']
if endpoint_name not in [ep.get('name') for ep in endpoints['endpoints']]:
    # Create VS endpoint
    client.create_endpoint(
        name=endpoint_name,
        endpoint_type="STANDARD"
    )

# Create VS index
index = client.create_delta_sync_index(
  endpoint_name=config.get('retriever')['vs_endpoint'],
  source_table_name=config.get('retriever')['vs_source'],
  index_name=config.get('retriever')['vs_index'],
  pipeline_type="TRIGGERED",
  primary_key="product_id",
  embedding_source_column="product_doc",
  embedding_model_endpoint_name="databricks-gte-large-en"
)