# RAG (Retrieval-augmented generation) ➡️ Azure CosmosDB ❤️ + ChatGPT 3.5

### Criar um grupo de recurso
```bash
az login
az group create --name {resource_group_name} --location {region_name}
```
### Criar uma instância do CosmosDB
```bash
az cosmosdb create --name {cosmosdb_account_name} --resource-group {resource_group_name} --locations regionName={region_name} --default-consistency-level Eventual --kind GlobalDocumentDB
```
### Habilitar o Vector Search
```bash
 az cosmosdb update --resource-group {resource_group_name} --name {cosmosdb_account_name}  --capabilities EnableNoSQLVectorSearch
```

#### Dependências do projeto

In [None]:
#r "nuget:Azure.AI.OpenAI,1.0.0-beta.17"
#r "nuget:Newtonsoft.Json,13.0.3"
#r "nuget:Microsoft.Azure.Cosmos,3.47.0"
#r "nuget:Azure.Identity,1.13.2"
#r "nuget:DotNetEnv,2.5.0"


#### Bibliotecas e clientes de conexão com o CosmosDB e o Azure Open AI

In [None]:
using System;
using System.Threading.Tasks;
using Azure.Identity;
using Microsoft.Azure.Cosmos;
using System.Collections.ObjectModel;
using DotNetEnv;
using Azure;
using Azure.AI.OpenAI;

// Carregar variáveis de ambiente do arquivo .env
Env.Load();
string EndpointUri = Environment.GetEnvironmentVariable("COSMOSDB_ENDPOINT");
var credential = new DefaultAzureCredential();
var PrimaryKey = Environment.GetEnvironmentVariable("COSMOSDB_PRIMARY_KEY");
var OpenAIEndpoint = Environment.GetEnvironmentVariable("OPENAI_ENDPOINT");
var OpenAIKey = Environment.GetEnvironmentVariable("OPENAI_KEY");
var OpenAIEmbeddingEndpoint = Environment.GetEnvironmentVariable("OPENAI_EMBEDDING_ENDPOINT");
var OpenAIEmbeddingKey = Environment.GetEnvironmentVariable("OPENAI_EMBEDDING_KEY");
var cosmosClient = new CosmosClient(EndpointUri, PrimaryKey);
var openAIClient = new OpenAIClient(new Uri(OpenAIEndpoint), new AzureKeyCredential(OpenAIKey));
var openAIEmbeddingClient = new OpenAIClient(new Uri(OpenAIEmbeddingEndpoint), new AzureKeyCredential(OpenAIEmbeddingKey));
    

#### Chamada para o CHATGPT 3.5 no Azure Open AI

In [None]:
string _systemPromptRecipeAssistant = @"
        You are an intelligent assistant for Contoso Recipes. 
        You are designed to provide helpful answers to user questions about recipes.
  
        Instructions:
        - If you're unsure of an answer, you can say ""I don't know"" or ""I'm not sure"" and recommend users search themselves.        
        - Your response  should be complete. 
        - Assume the user is not an expert in cooking.
        - Format the content so that it can be printed to the Command Line console;";

var userPrompt = "You serve Latin food?";
//userPrompt = "Do you know how to make hamburger?";
//userPrompt = "You serve spicy food";


var systemMessage = new ChatRequestSystemMessage(_systemPromptRecipeAssistant);
var userMessage = new ChatRequestUserMessage(userPrompt);
int _openAIMaxTokens = 100;

ChatCompletionsOptions options = new()
{
    DeploymentName= "gpt-35-turbo",
    Messages =
    {
        systemMessage,
        userMessage
    },
    MaxTokens = _openAIMaxTokens,
    //O parâmetro Temperature é usado para controlar a aleatoriedade 
    //na geração de texto por modelos de linguagem
    Temperature = 0.5f, 
    //O NucleusSamplingFactor (também conhecido como Top-p Sampling) é um 
    //parâmetro usado para controlar a aleatoriedade na geração de texto 
    //por modelos de linguagem, como o GPT
    NucleusSamplingFactor = 0.95f, 
    FrequencyPenalty = 0,
    PresencePenalty = 0
};

Azure.Response<ChatCompletions> completionsResponse = await openAIClient.GetChatCompletionsAsync(options);

ChatCompletions completions = completionsResponse.Value;

Console.WriteLine($"Resposta: {completions.Choices[0].Message.Content}");
Console.WriteLine($"PromptTokens: {completions.Usage.PromptTokens}");
Console.WriteLine($"CompletionTokens: {completions.Usage.CompletionTokens}");

#### Objeto de entidade que representa as Receitas

In [None]:
using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using System.Threading.Tasks;

public class Recipe
{

    public string id { get; set; }
    public string name { get; set; }
    public string description { get; set; }
    public List<float> vectors { get; set; }
    public string cuisine { get; set; }
    public string difficulty { get; set; }
    public string prepTime { get; set; }
    public string cookTime { get; set; }
    public string totalTime { get; set; }
    public int servings { get; set; }
    public List<string> ingredients { get; set; }
    public List<string> instructions { get; set; }
    public float SimilarityScore {get; set;}

    public string toString()
    {
        return $"Recipe: {name}, Cuisine: {cuisine}, Difficulty: {difficulty}, Prep Time: {prepTime}, Cook Time: {cookTime}, Total Time: {totalTime}, Servings: {servings}, Ingredients: {string.Join(", ", ingredients)}, Instructions: {string.Join(", ", instructions)}";
    }
}     


#### Criar um Container (tabela) no CosmosDB

[Documentação](https://learn.microsoft.com/pt-br/azure/cosmos-db/nosql/how-to-dotnet-get-started?wt.mc_id=AZ-MVP-5003638)

<img src="https://learn.microsoft.com/pt-br/azure/cosmos-db/nosql/media/how-to-dotnet-get-started/resource-hierarchy.svg" width="500"/>

In [None]:

Container container;
try
{
    // 1000 Request Unit para o container
    var throughputProperties = ThroughputProperties.CreateAutoscaleThroughput(1000);
    var containerName = "recipes";

    // Cria o banco de dados caso não exista
    Database _database = cosmosClient.CreateDatabaseIfNotExistsAsync("testdb", throughputProperties).Result;
    
    // Cria o container definindo as polices de indexação
    ContainerProperties properties = new ContainerProperties(id: containerName, partitionKeyPath: "/id")
    {
        // Tempo de vida do índice para 1 dia
        DefaultTimeToLive = 86400,

        // Define o vector embedding container policy
        VectorEmbeddingPolicy = new(
        new Collection<Embedding>(
            [
                new Embedding()
                {
                    Path = "/vectors",
                    DataType = VectorDataType.Float32,
                    DistanceFunction = DistanceFunction.Cosine,
                    Dimensions = 1536
                }
            ])),
        IndexingPolicy = new IndexingPolicy()
        {
            // Define the vector index policy
            VectorIndexes = new()
            {
                new VectorIndexPath()
                {
                    Path = "/vectors",
                    Type = VectorIndexType.QuantizedFlat
                }
            }
        }
    };
    properties.IndexingPolicy.IncludedPaths.Add(new IncludedPath { Path = "/*" });    
    properties.IndexingPolicy.ExcludedPaths.Add(new ExcludedPath { Path = "/vectors/*" });
    
    // Cria o container
    _database.CreateContainerIfNotExistsAsync(properties, throughputProperties).GetAwaiter();

    container = _database.GetContainer(containerName);
}
catch (Exception ex)
{
    Console.WriteLine($"An error occurred: {ex.Message}");
}

#### Geração dos Embeddings e inserção no Azure CosmosDB 

In [None]:
using System.Collections.Generic;
using System.Linq;
using System.Text;
using System.Threading.Tasks;
using System.IO;
using Newtonsoft.Json;
using Azure.AI.OpenAI;
using Microsoft.Azure.Cosmos;
using Microsoft.Azure.Cosmos.Fluent;
using Microsoft.Azure.Cosmos.Serialization.HybridRow.Schemas;
using System.Collections.ObjectModel;
using System.ComponentModel;
using System.Diagnostics;
using Container = Microsoft.Azure.Cosmos.Container;
using PartitionKey = Microsoft.Azure.Cosmos.PartitionKey;
using Azure;
using Azure.Core;


List<Recipe> recipes = new List<Recipe>();

Directory.GetFiles("../recipes").ToList().ForEach(f =>
    {
        var jsonString= System.IO.File.ReadAllText(f);
        Recipe recipe = JsonConvert.DeserializeObject<Recipe>(jsonString);
        recipe.id = recipe.name.ToLower().Replace(" ","");
        recipes.Add(recipe);

    }
);


foreach (Recipe recipe in recipes)
{
    Console.WriteLine(recipe.toString());
    EmbeddingsOptions embeddingsOptions = new()
    {
        DeploymentName = "text-embedding-ada-002",
        Input = { JsonConvert.SerializeObject(recipe) }
    };

    
    var response = await openAIClient.GetEmbeddingsAsync(embeddingsOptions);
    Embeddings embeddings = response.Value;
    float[] embedding = embeddings.Data[0].Embedding.ToArray();
    recipe.vectors = embedding.ToList<float>();
    
    container.UpsertItemAsync(recipe, new PartitionKey(recipe.id)).Wait();
    
}


### RAG (Retrieval-augmented generation)
#### Busca no CosmosDB pelo Vector Search

In [None]:
var userPrompt = "You serve Latin food?";
//userPrompt = "Do you know how to make hamburger?";
//userPrompt = "You serve spicy food";

var similarityScore = 0.70;

EmbeddingsOptions embeddingsOptions = new()
{
    DeploymentName = "text-embedding-ada-002",
    Input = {userPrompt}
};

var vectors = await openAIClient.GetEmbeddingsAsync(embeddingsOptions);

Embeddings embeddings = vectors.Value;
float[] embedding = embeddings.Data[0].Embedding.ToArray();


string queryText = @"SELECT Top 3 c.name,c.description, c.ingredients, c.cuisine, c.difficulty, c.prepTime, c.cookTime, c.totalTime, c.servings,
                        VectorDistance(c.vectors, @vectors) AS SimilarityScore
                        FROM c
                        WHERE VectorDistance(c.vectors, @vectors) > @similarityScore 
                        ORDER BY VectorDistance(c.vectors, @vectors)";

var queryDef = new QueryDefinition(
        query: queryText)
    .WithParameter("@vectors", embedding.ToList<float>())
    .WithParameter("@similarityScore", similarityScore);

FeedIterator<Recipe> resultSet = container.GetItemQueryIterator<Recipe>(queryDefinition: queryDef);

List<Recipe> recipes = new List<Recipe>();
while (resultSet.HasMoreResults)
{
    FeedResponse<Recipe> response = await resultSet.ReadNextAsync();
    recipes.AddRange(response);
}

recipes.ForEach(recipe => Console.WriteLine($"Name: {recipe.name}, SimilarityScore: {recipe.SimilarityScore}"));


#### Chamada para o CHATGPT 3.5 incluindo os documentos encontrados no Azure CosmosDB no contexto

In [None]:
string _systemPromptRecipeAssistant = @"
        You are an intelligent assistant for Contoso Recipes. 
        You are designed to provide helpful answers to user questions about 
        recipes, cooking instructions provided in JSON format below.
  
        Instructions:
        - Only answer questions related to the recipe provided below,
        - Don't reference any recipe not provided below.
        - If you're unsure of an answer, you can say ""I don't know"" or ""I'm not sure"" and recommend users search themselves.        
        - Your response  should be complete. 
        - List the Name of the Recipe at the start of your response folowed by step by step cooking instructions
        - Assume the user is not an expert in cooking.
        - Format the content so that it can be printed to the Command Line console;
        - In case there are more than one recipes you find let the user pick the most appropiate recipe.";

var documents = JsonConvert.SerializeObject(recipes);
var systemMessage = new ChatRequestSystemMessage(_systemPromptRecipeAssistant + documents);
var userMessage = new ChatRequestUserMessage(userPrompt);
int _openAIMaxTokens = 100;

ChatCompletionsOptions options = new()
{
    DeploymentName= "gpt-35-turbo",
    Messages =
    {
        systemMessage,
        userMessage
    },
    MaxTokens = _openAIMaxTokens,
    Temperature = 0.5f, //0.3f,
    NucleusSamplingFactor = 0.95f, 
    FrequencyPenalty = 0,
    PresencePenalty = 0
};

Azure.Response<ChatCompletions> completionsResponse = await openAIClient.GetChatCompletionsAsync(options);

ChatCompletions completions = completionsResponse.Value;

Console.WriteLine($"Resposta: {completions.Choices[0].Message.Content}");
Console.WriteLine($"PromptTokens: {completions.Usage.PromptTokens}");
Console.WriteLine($"CompletionTokens: {completions.Usage.CompletionTokens}");
