<a href="https://colab.research.google.com/github/walkerataide/my-first-llm/blob/main/Embedding.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [27]:
!pip install -U -q google-generativeai

In [28]:
#Importações e configurações iniciais
#biblioteca para trabalhar com estruturas de vetores e outros processamentos matemáticos (fazer calculos)
import numpy as np
#biblioteca para manipular dataframes (planilhas)
import pandas as pd
import google.generativeai as genai
from google.colab import userdata
#
GOOGLE_API_KEY = userdata.get("GOOGLE_API_KEY")
genai.configure(api_key = GOOGLE_API_KEY)

In [4]:
#Lista os modelos de embeddings (forma de representar palavras na forma vetorial para trazer o contexto semântico da palavra)
for m in genai.list_models():
  if "embedContent" in m.supported_generation_methods:
    print(m.name)

models/embedding-001
models/text-embedding-004


In [10]:
#Exemplo de embedding
title = "Introdução à API Gemini: Python"
sample_text = ("Título: Introdução à API Gemini: Python"
  "\n"
  "Artigo Completo: \n"
  "\n"
  "Este guia de início rápido demonstra como usar o SDK do Python para a API Gemini, que permite acesso aos modelos de linguagem grandes do Gemini do Google. Neste guia de início rápido, você aprenderá a:")

embeddings = genai.embed_content(model="models/embedding-001",
                                 content=sample_text,
                                 title=title,
                                 task_type="RETRIEVAL_DOCUMENT")

print(embeddings)

{'embedding': [0.017090669, -0.06622303, -0.036176864, 0.014726001, 0.05061557, -0.002303081, -0.005754607, -0.03724353, 0.04574199, 0.045256417, -0.025524141, -0.02382007, -0.06628093, -0.0771351, -0.00766119, 0.037121113, -0.004154374, 0.019611185, -0.0028708973, -0.014733275, -0.021901194, -0.011241951, -0.012981975, -0.078203425, -0.017130392, 0.0012825408, 0.009685343, 0.007894106, -0.018038895, 0.055251967, -0.012097395, 0.013593106, -0.013998032, 0.027272416, 0.0035586148, -0.07217318, -0.037143722, -0.057698663, 0.009278778, 0.009554872, 0.020708432, -0.05227365, -0.008571614, -0.027300574, -0.03612489, -0.0064564943, 0.04598813, 0.044644155, 0.021682138, -0.020614274, 0.022049773, 0.02238689, 0.056816936, -0.026940772, 0.009528412, -0.024676992, 0.056384187, -0.05050221, -0.017809328, 0.016222706, 0.02685331, 0.027858004, -0.006395875, 0.06989487, 0.03268556, -0.04308851, -0.010352005, -0.020169042, 0.057083312, -0.018273894, 0.02480381, -0.022713825, 0.0063875853, -0.04188260

In [29]:
#Listagem de docunentos que serão buscados
DOCUMENT1 = {
    "title": "Operating the Climate Control System",
    "content": "Your Googlecar has a climate control system that allows you to adjust the temperature and airflow in the car. To operate the climate control system, use the buttons and knobs located on the center console.  Temperature: The temperature knob controls the temperature inside the car. Turn the knob clockwise to increase the temperature or counterclockwise to decrease the temperature. Airflow: The airflow knob controls the amount of airflow inside the car. Turn the knob clockwise to increase the airflow or counterclockwise to decrease the airflow. Fan speed: The fan speed knob controls the speed of the fan. Turn the knob clockwise to increase the fan speed or counterclockwise to decrease the fan speed. Mode: The mode button allows you to select the desired mode. The available modes are: Auto: The car will automatically adjust the temperature and airflow to maintain a comfortable level. Cool: The car will blow cool air into the car. Heat: The car will blow warm air into the car. Defrost: The car will blow warm air onto the windshield to defrost it."}
DOCUMENT2 = {
    "title": "Touchscreen",
    "content": "Your Googlecar has a large touchscreen display that provides access to a variety of features, including navigation, entertainment, and climate control. To use the touchscreen display, simply touch the desired icon.  For example, you can touch the \"Navigation\" icon to get directions to your destination or touch the \"Music\" icon to play your favorite songs."}
DOCUMENT3 = {
    "title": "Shifting Gears",
    "content": "Your Googlecar has an automatic transmission. To shift gears, simply move the shift lever to the desired position.  Park: This position is used when you are parked. The wheels are locked and the car cannot move. Reverse: This position is used to back up. Neutral: This position is used when you are stopped at a light or in traffic. The car is not in gear and will not move unless you press the gas pedal. Drive: This position is used to drive forward. Low: This position is used for driving in snow or other slippery conditions."}

documents = [DOCUMENT1, DOCUMENT2, DOCUMENT3]

In [30]:
#Cria o DataFrame (planilha) dos documento susanod o pandas
df = pd.DataFrame(documents)
df.columns = ["Titulo", "Conteudo"] #Altera o nome das colunas do DataFrame, obs: evitar acentos
df

Unnamed: 0,Titulo,Conteudo
0,Operating the Climate Control System,Your Googlecar has a climate control system th...
1,Touchscreen,Your Googlecar has a large touchscreen display...
2,Shifting Gears,Your Googlecar has an automatic transmission. ...


In [31]:
model = "models/embedding-001"

In [32]:
def embed_fn(title, text):
  return genai.embed_content(model=model,
                             content=text,
                             title=title,
                             task_type="RETRIEVAL_DOCUMENT")["embedding"]

In [38]:
#Cria uma noma coluna "Embeddings" no DataFrame e gera o embed de cada documento e insere linha-a-linha no dataframe na nova coluna criada"
df["Embeddings"] = df.apply(lambda row: embed_fn(row["Titulo"], row["Conteudo"]), axis=1)
df

Unnamed: 0,Titulo,Conteudo,Embeddings
0,Operating the Climate Control System,Your Googlecar has a climate control system th...,"[-0.033361107, -0.021217084, -0.049581926, -0...."
1,Touchscreen,Your Googlecar has a large touchscreen display...,"[0.009660736, -0.030662702, -0.017281422, -0.0..."
2,Shifting Gears,Your Googlecar has an automatic transmission. ...,"[-0.04270796, -0.007160868, -0.03242516, -0.02..."


In [39]:
def gerar_buscar_consulta(consulta, base, model):
  embedding_consulta = genai.embed_content(model=model,
                               content=consulta,
                               task_type="RETRIEVAL_QUERY")["embedding"]

  #Produtos Escalares, calcular as distâncias par a par (pergunta X DocumentoN). Retornar o melhor contexto
  produtos_escalares = np.dot(np.stack(df["Embeddings"]), embedding_consulta)

  indice = np.argmax(produtos_escalares) #retorna a maior similaridade
  return df.iloc[indice]["Conteudo"]

In [45]:
consulta = "How can i shift gears of google car?"

resultado = gerar_buscar_consulta(consulta, df, model)

print(resultado)

Your Googlecar has an automatic transmission. To shift gears, simply move the shift lever to the desired position.  Park: This position is used when you are parked. The wheels are locked and the car cannot move. Reverse: This position is used to back up. Neutral: This position is used when you are stopped at a light or in traffic. The car is not in gear and will not move unless you press the gas pedal. Drive: This position is used to drive forward. Low: This position is used for driving in snow or other slippery conditions.


In [51]:
#RAG - Combina base local de dados com a base da IA para melhorara as saídas de dados sem alucinações
prompt = f"Rewrite this text in a creative way, without adding information that isn`t into the text: {resultado} translate it to brazilian portuguese"

model_2 = genai.GenerativeModel("gemini-1.0-pro")
response = model_2.generate_content(prompt)
print(response.text)

Seu Carango do Google tem uma transmissão automática. Para trocar de marcha, simplesmente mova a alavanca de câmbio para a posição desejada. Estacionado: Esta posição é usada quando você está estacionado. As rodas são travadas e o carro não pode se mover. Ré: esta posição é usada para dar ré. Neutro: esta posição é usada quando você está parado em um sinal ou no trânsito. O carro não está engatado e não se moverá a menos que você pressione o pedal do acelerador. Dirigir: esta posição é usada para dirigir para frente. Baixa: esta posição é usada para dirigir na neve ou em outras condições escorregadias.




---



Exemplos do site da documentação do google:
https://ai.google.dev/gemini-api/docs/get-started/python?hl=pt-br#use_embeddi[texto do link](https://)ngs

In [6]:
text = "Hello world"
result = genai.embed_content(model="models/embedding-001",content=text)

# Acessa o valor do embedding do dicionário result
print(result["embedding"])

[0.04703258, -0.040190056, -0.029026963, -0.026809642, 0.018920582, -8.3654784e-05, 0.031116402, -0.019520544, 0.0114913415, 0.009625779, 0.04571186, 0.05170951, -0.007854084, -0.07627559, -0.00073652336, -0.02259244, 0.01149677, -0.00761096, 0.006400746, -0.0036826304, -8.6395165e-05, 0.007910556, -0.031401973, -0.027668774, 0.0131483, 0.005762955, -0.0022430476, -0.07029421, 0.007011013, 0.07013052, -0.047634568, 0.008311825, -0.060211696, 0.016431302, 0.042709153, -0.047674265, 0.03426082, 0.021967327, -0.0070651034, 0.00032590108, 0.013825696, -0.08921293, -0.03404069, -0.03793646, 0.059349738, -0.0044174152, 0.015472682, -0.0061533544, 0.022183485, -0.08739371, 0.049185753, 0.025158774, 0.044854913, -0.022910612, 0.02060697, -0.016286727, 0.07367813, 0.013565082, -0.06963922, -0.002877564, 0.02369202, 0.0143784685, -0.012660949, 0.06607742, -0.00069232617, -0.017637717, -0.06946077, 0.042905096, 0.03502765, -0.029362002, 0.0069921436, -0.03341513, 0.036520302, -0.039816536, -0.025

In [7]:
print(len(result["embedding"])) #O embedding tem 768 dimensões (tamanho padronizado)

768


In [8]:
text2 = "Embedding é uma técnica usada para representar informações como uma lista de números de ponto flutuante em uma matriz. Com o Gemini, é possível representar texto (palavras, frases e blocos de texto) de forma vetorial, facilitando a comparação e o contraste de embeddings. Por exemplo, dois textos que compartilham um assunto ou sentimento semelhante devem ter embeddings semelhantes, que podem ser identificados por meio de técnicas matemáticas de comparação, como a similaridade de cossenos. Para mais informações sobre como e por que usar embeddings, consulte o Guia de embeddings."
result = genai.embed_content(model="models/embedding-001",content=text2)

# Acessa o valor do embedding do dicionário result
print(result["embedding"])
print(len(result["embedding"])) #O embedding tem 768 dimensões(tamanho padronizado)

[0.04703258, -0.040190056, -0.029026963, -0.026809642, 0.018920582, -8.3654784e-05, 0.031116402, -0.019520544, 0.0114913415, 0.009625779, 0.04571186, 0.05170951, -0.007854084, -0.07627559, -0.00073652336, -0.02259244, 0.01149677, -0.00761096, 0.006400746, -0.0036826304, -8.6395165e-05, 0.007910556, -0.031401973, -0.027668774, 0.0131483, 0.005762955, -0.0022430476, -0.07029421, 0.007011013, 0.07013052, -0.047634568, 0.008311825, -0.060211696, 0.016431302, 0.042709153, -0.047674265, 0.03426082, 0.021967327, -0.0070651034, 0.00032590108, 0.013825696, -0.08921293, -0.03404069, -0.03793646, 0.059349738, -0.0044174152, 0.015472682, -0.0061533544, 0.022183485, -0.08739371, 0.049185753, 0.025158774, 0.044854913, -0.022910612, 0.02060697, -0.016286727, 0.07367813, 0.013565082, -0.06963922, -0.002877564, 0.02369202, 0.0143784685, -0.012660949, 0.06607742, -0.00069232617, -0.017637717, -0.06946077, 0.042905096, 0.03502765, -0.029362002, 0.0069921436, -0.03341513, 0.036520302, -0.039816536, -0.025

In [9]:
#Para processar lotes de strings, transmita uma lista de strings em content:
result = genai.embed_content(
    model="models/embedding-001",
    content=[
      'What is the meaning of life?',
      'How much wood would a woodchuck chuck?',
      'How does the brain work?'],
    task_type="retrieval_document",
    title="Embedding of list of strings")

# A list of inputs > A list of vectors output
for v in result['embedding']:
  print(str(v)[:50], '... TRIMMED ...')

0.04703258 ... TRIMMED ...
-0.040190056 ... TRIMMED ...
-0.029026963 ... TRIMMED ...
-0.026809642 ... TRIMMED ...
0.018920582 ... TRIMMED ...
-8.3654784e-05 ... TRIMMED ...
0.031116402 ... TRIMMED ...
-0.019520544 ... TRIMMED ...
0.0114913415 ... TRIMMED ...
0.009625779 ... TRIMMED ...
0.04571186 ... TRIMMED ...
0.05170951 ... TRIMMED ...
-0.007854084 ... TRIMMED ...
-0.07627559 ... TRIMMED ...
-0.00073652336 ... TRIMMED ...
-0.02259244 ... TRIMMED ...
0.01149677 ... TRIMMED ...
-0.00761096 ... TRIMMED ...
0.006400746 ... TRIMMED ...
-0.0036826304 ... TRIMMED ...
-8.6395165e-05 ... TRIMMED ...
0.007910556 ... TRIMMED ...
-0.031401973 ... TRIMMED ...
-0.027668774 ... TRIMMED ...
0.0131483 ... TRIMMED ...
0.005762955 ... TRIMMED ...
-0.0022430476 ... TRIMMED ...
-0.07029421 ... TRIMMED ...
0.007011013 ... TRIMMED ...
0.07013052 ... TRIMMED ...
-0.047634568 ... TRIMMED ...
0.008311825 ... TRIMMED ...
-0.060211696 ... TRIMMED ...
0.016431302 ... TRIMMED ...
0.042709153 ... TRIMMED ...
-0.0