<a href="https://colab.research.google.com/github/workszop/notebooks/blob/main/embedding_cohere_example_test.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Cohere

Cohere offers free trial API keys, we can use to chat to their model.

[API key](https://dashboard.cohere.com/api-keys)  
[API documentation](https://docs.cohere.com/docs/cochat-beta)

## Chat

In [None]:
%%capture
!pip install cohere

In [None]:
import cohere
import numpy as np
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.manifold import TSNE
import matplotlib.pyplot as plt

key = input('please provide your Cohere API key:\n')
co = cohere.Client(key)

# embedding

## simple animals example

In [None]:
# below we create a list of animals and use cohere embedding service to embed them
# embadding means turning text into numbers which encode its meaning into multi dimentional space
# the number of dimentions depends on the model used
# "embed-english-light-v3.0" generates 384 dimensions embedding
# (each piece of text is represented by 384 numbers)

animals = ["dog", "cat","hamster", "mouse", "horse", "cow", "pig",
           "giraffe", "elephant", "rhinoceros",
           "wolf", "fox", "shark", "whale", "manta"]


model_name = "embed-english-light-v3.0"

embeds = co.embed(texts=animals,
                  model=model_name,
                  input_type="search_document").embeddings

embeddings = np.array(embeds)
print(f'embedding dimensions: {embeddings.shape[1]}')

In [None]:
# we use tsne library to transform the embeddigs to 2 dimentional space
# it enables us to display them on the screen
# please bear in mind that this transformation doesn't preserve all the information

tsne = TSNE(n_components=2, perplexity=2, random_state=42, init='random', learning_rate=200)
words_embedded = tsne.fit_transform(embeddings)
print(f'dimensions after transformation: {words_embedded.shape[1]}')

In [None]:
# visualization of embedding (after dimentionality reduction)
# the visualization you see doesn't show all the meaning encoded by embedding

plt.figure(figsize=(10, 10))

for i, label in enumerate(animals):
  x, y = words_embedded[i, :]
  plt.scatter(x, y)
  plt.annotate(label, xy=(x, y), xytext=(5, 5), textcoords='offset points', ha='center', va='bottom', fontsize=10)

plt.title("word embeddings in 2D")

## various words

In [None]:
words = ['apple', 'pan', 'table', 'fridge', 'soap', 'broom', 'magazine', 'pin',
         'necklace', 'bed', 'shirt', 'stapler', 'newspaper', 'briefcase',
         'camera', 'mop', 'kettle', 'sponge', 'mug', 'ring', 'socks', 'sandals',
         'clock', 'pliers', 'highlighter', 'boots', 'blender',
         'microwave', 'helmet', 'computer', 'mouse', 'headphones',
         'curtain', 'towel', 'pen', 'duster', 'wrench', 'calculator',
         'radio', 'shampoo', 'stove', 'jeans', 'jar', 'glue', 'oven', 'wallet',
         'thesaurus', 'gown', 'diary', 'dictionary', 'dress', 'keyboard',
         'bicycle', 'bucket', 'envelope', 'blanket', 'marker', 'ruler', 'atlas',
         'book', 'truck', 'beanie', 'earrings', 'bottle', 'ten', 'three', 'pencil',
         'frame', 'backpack', 'lamp', 'paper', 'can', 'toothpaste', 'spatula',
         'calendar', 'cup', 'scarf', 'orange', 'monitor', 'hammer', 'hat', 'drill',
         'sneakers', 'tie', 'beret', 'brush', 'swimsuit', 'picture', 'encyclopedia',
         'pants', 'purse', 'stamp', 'binder', 'suitcase', 'motorcycle', 'eraser',
         'shorts', 'chair', 'fork', 'knife', 'mixer', 'dryer', 'mirror', 'clip',
         'one', 'couch', 'saw', 'bolt', 'spoon', 'notebook', 'toaster', 'gloves',
         'nail', 'car', 'scissors', 'razor', 'pillow', 'glasses', 'nut', 'coat',
         'heels', 'crayon', 'shoes', 'skirt', 'sunglasses', 'pot', 'plate', 'bikini',
         'cap', 'television', 'robe', 'washer', 'sweater', 'phone', 'belt',
         'screwdriver', 'watch', 'bowl', 'bracelet', 'tape', 'twenty', 'screw',
         'vacuum', 'peach', 'notepad', 'pajamas', 'carpet', 'toothbrush', 'band',
         'file', 'umbrella', 'box', 'novel', 'printer', 'underwear', 'slippers',
         'pear', 'bra', 'card', 'desk', 'thousand', 'jacket', 'folder', 'bag',
         'dish', 'bus', 'textbook', 'banana', 'keys']

In [None]:
# below we create a list of animals and use cohere embedding service to embed them
# embadding means turning text into numbers which encode its meaning into multi dimentional space
# the number of dimentions depends on the model used
# "embed-english-light-v3.0" generates 384 dimensions embedding
# (each piece of text is represented by 384 numbers)

model_name = "embed-english-light-v3.0"

embeds = co.embed(texts=words,
                  model=model_name,
                  input_type="search_document").embeddings

embeddings = np.array(embeds)

In [None]:
query = "rain"
input_type_query = "search_query"

query_embed = co.embed(texts=[query],
                  model=model_name,
                  input_type="search_query").embeddings

q_embed = np.array(query_embed)

In [None]:
cosine_similarity(q_embed, embeddings)

In [None]:
words[np.argmax(cosine_similarity(q_embed, embeddings))]

### visualization

In [None]:
# we use tsne library to transform the embeddigs to 2 dimentional space
# it enables us to display them on the screen
# please bear in mind that this transformation doesn't preserve all the information

tsne = TSNE(n_components=2, perplexity=15, random_state=42, init='random', learning_rate=200)
words_embedded = tsne.fit_transform(embeddings)
print(words_embedded.shape)

In [None]:
# visualization of embedding (after dimentionality reduction)
# the visualization you see doesn't show all the meaning encoded by embedding

plt.figure(figsize=(12, 12))

for i, label in enumerate(words):
  x, y = words_embedded[i, :]
  plt.scatter(x, y)
  plt.annotate(label, xy=(x, y), xytext=(5, 2), textcoords='offset points', ha='right', va='bottom')

plt.title("word embeddings in 2D")