# Pre-req
- To run SynapseML in Fabric, you need F64 capacity or above


In [1]:
# Import
from pyspark.sql import Row
from pyspark.sql.types import *
from pyspark.sql.functions import *
import pyspark.sql.functions as F

import synapse.ml.core
import synapse.ml.services
from synapse.ml.services.openai import *

StatementMeta(, 5b2f3051-57a9-4e75-b7ad-8a0784b1855f, 3, Finished, Available, Finished)

In [2]:
# Configuration
keyVaultURI = "https://sbdemokvfabic.vault.azure.net/"
openAIServiceName = "sbdemoaoai"
aiSearchServiceName = "sbdemoass002"
embeddingModelName = "demo-text-e3s" # Model: text-embedding-3-small
chatModelName = "demo-gpt4" # Model: gpt-4

# Retrive OpenAI and AI Search keys from Key Vault
openAIkey = mssparkutils.credentials.getSecret(keyVaultURI,openAIServiceName)
aiSearchKey = mssparkutils.credentials.getSecret(keyVaultURI,aiSearchServiceName)

print(openAIkey)
print(aiSearchKey)

StatementMeta(, 5b2f3051-57a9-4e75-b7ad-8a0784b1855f, 4, Finished, Available, Finished)

[REDACTED]
[REDACTED]


In [3]:
# AI Search Index Config

import requests
import json

aiSearchApiVersion = "2023-11-01"
aiSearchIndexName = "demo-customer-purchase-index"
embeddingLength = 1536

# AI Search - Document config
docBatchSize = 250

StatementMeta(, 5b2f3051-57a9-4e75-b7ad-8a0784b1855f, 5, Finished, Available, Finished)

In [4]:
# Setup Chat

# The data and question have to use the same embedding model
questionEmbeddingService = (
    OpenAIEmbedding()
        .setCustomServiceName(openAIServiceName)
        .setSubscriptionKey(openAIkey)
        .setDeploymentName(embeddingModelName)
)

chatService = (
    OpenAIChatCompletion()
        .setCustomServiceName(openAIServiceName)
        .setSubscriptionKey(openAIkey)
        .setDeploymentName(chatModelName)
)

StatementMeta(, 5b2f3051-57a9-4e75-b7ad-8a0784b1855f, 6, Finished, Available, Finished)

In [5]:
# Sample question for embedding
# Scenario: You are a new customer service rep. You have a customer on the phone inquiring about an order... 

# Question 1: Order Date May 04, 2024, Can you retrieve Order ID: 256909?

def createQuestionEmbedding(embeddingService, userQuestion):
    questionColName = "question"

    questionDf = spark.createDataFrame(
        [
            (1, userQuestion)
        ],
        ["id", questionColName]
    )

    questionCompletedDf = embeddingService \
                            .setTextCol(questionColName) \
                            .setErrorCol("p_error") \
                            .setOutputCol("p_embeddings") \
                            .transform(questionDf)

    embedding = questionCompletedDf.collect()[0].p_embeddings.tolist()

    return embedding

userQuestion1 = "Order Date May 04, 2024, Can you retrieve Order ID: 256909?"
questionEmbedding1 = createQuestionEmbedding(questionEmbeddingService, userQuestion1)

StatementMeta(, 5b2f3051-57a9-4e75-b7ad-8a0784b1855f, 7, Finished, Available, Finished)

In [6]:
import json
import requests

# Based on # of Sales Items per day
numOfResults = 75


# Retrieve the top results based on the questionEmbedding
def retrieveTopResults(numOfResults, question, questionEmbedding):
    url = f"https://{aiSearchServiceName}.search.windows.net/indexes/{aiSearchIndexName}/docs/search?api-version={aiSearchApiVersion}"

    payload = json.dumps({
        "search": question,
        "top": numOfResults,
        "vectorQueries": [
            {
                "vector": questionEmbedding,
                "k": numOfResults,
                "fields": "Embeddings",
                "kind": "vector"
            }
        ]
    })

    headers = {
        "Content-Type": "application/json",
        "api-key": aiSearchKey,
    }

    response = requests.request("POST", url, headers=headers, data=payload)
    outputTopResults = json.loads(response.text)

    return outputTopResults


topResults1 = retrieveTopResults(numOfResults, userQuestion1, questionEmbedding1)


# Create a list using the content column
topContents1 = [ val["Content"] for val in topResults1["value"] ]

display(topResults1["value"])

# Those are the top matches. Not all of them matches the Order number. 

StatementMeta(, 5b2f3051-57a9-4e75-b7ad-8a0784b1855f, 8, Finished, Available, Finished)

SynapseWidget(Synapse.DataFrame, b9e6e6c8-fb5e-4788-aea2-c862d6a15733)

In [7]:
# We can call Chat to create a response using the topContents

def getChatResponse(chatService, question, contents):
    prompt = f"""
        context: You are a Customer Service Rep. {contents}
        Company Policy: 
          - Customer Service Rep, you only have permission to refund for Line Total < $150. 
          - Customer Service Supervisor can refund for Line Total < $350.
          - CS Manager is required to refund for any amount higher.

        Answer the question based on the context above.
        If the information is not relevent to the context, reply with "I don't know the answer.".
    """

    chatDf = spark.createDataFrame(
                [
                    (
                        [
                            Row(name="system", role="system", content=prompt),
                            Row(name="user", role="user", content=question),
                        ],
                    ),
                ],
                ["p_messages"]
            )

    chatCompletion = chatService \
                        .setMessagesCol("p_messages") \
                        .setErrorCol("p_error") \
                        .setOutputCol("p_chatCompletions")

    dfCompletion = chatCompletion.transform(chatDf)

    if dfCompletion.filter(col("p_error").isNotNull()).count() > 0:
        dfError = dfCompletion.select("p_error")
        responseList = [' '.join(r.errorMessage) for r in dfError.collect() ]
        return ' '.join(responseList) # Compile the error messages

    dfSuccess = dfCompletion.select("p_chatCompletions.choices.message.content")
    responseList = [' '.join(r.content) for r in dfSuccess.collect()]
    return ' '.join(responseList)

    return chatCompletion.transform(chatDf)

chatResponse = getChatResponse(chatService, userQuestion1, topContents1)

print(chatResponse)

StatementMeta(, 5b2f3051-57a9-4e75-b7ad-8a0784b1855f, 9, Finished, Available, Finished)

Of course, I found multiple order lines under Order ID: 256909 on May 04, 2024:

1. Order Line ID (803933) sold 7 quantity of 'Alien officer hoodie (Black) 5XL' at $35.00 each. The Line total is $245.00.
2. Order Line ID (803932) sold 9 quantity of 'Ogre battery-powered slippers (Green) L' at $32.00 each. The Line total is $288.00.
3. Order Line ID (803934) sold 5 quantity of 'DBA joke mug - SELECT caffeine FROM mug (Black)' at $13.00 each. The Line total is $65.00.


In [8]:
# Question 2
userQuestion2 = """Order Date May 04, 2024, Order ID: 256909. 
Customer Request: The customer wants to refund all the Alien hoodies

Can I issue an refund? If not, who do I need to obtain permission from? 
"""
questionEmbedding2 = createQuestionEmbedding(questionEmbeddingService, userQuestion2)
topResults2 = retrieveTopResults(numOfResults, userQuestion2, questionEmbedding2)
topContents2 = [ val["Content"] for val in topResults2["value"] ]

chatResponse2 = getChatResponse(chatService, userQuestion2, topContents2)

print(chatResponse2)

StatementMeta(, 5b2f3051-57a9-4e75-b7ad-8a0784b1855f, 10, Finished, Available, Finished)

The order in question involves two different products: 

- Line ID 803933 sold 7 quantity of 'Alien officer hoodie (Black) 5XL' with a line total of $245.00.
- Line ID 803934 sold 5 quantity of 'DBA joke mug - SELECT caffeine FROM mug (Black)' with a line total of $65.00.

To refund the 'Alien officer hoodie (Black) 5XL', you would need to obtain permission from a Customer Service Supervisor because the line total is $245, which is more than $150, but less than $350. 

But you, as a Customer Service Representative, can issue a refund for the 'DBA joke mug - SELECT caffeine FROM mug (Black)' because it falls under the line total of $150.


In [9]:
%%sql

-- Let confirm this
SELECT OrderID, OrderLineID, Description, ItemSoldTotal
FROM CS_Lakehouse.Processed_CustomerPurchase
WHERE OrderID = 256909;

StatementMeta(, 5b2f3051-57a9-4e75-b7ad-8a0784b1855f, 11, Finished, Available, Finished)

<Spark SQL result set with 3 rows and 4 fields>

# References
- [Azure AI Search - Search documents](https://learn.microsoft.com/en-us/rest/api/searchservice/search-documents?wt.mc_id=MVP_365600)
- [Microsoft Fabric - Chat with SynapseML library](https://learn.microsoft.com/en-us/fabric/data-science/ai-services/how-to-use-openai-sdk-synapse?tabs=synapseml#chat?wt.mc_id=MVP_365600)