# Pipeline Testing
This file will be steps that will be required to convert uploaded files to MM vector embeddings using voyage-multimodal-3.

## Imports
Importing necessary files and getting important env vars below, as well as creating references to all necessary databases and OpenAI

In [2]:
import os, PIL
import numpy as np
from dotenv import load_dotenv
from openai import OpenAI
from pinecone import Pinecone
from pymongo import MongoClient
from voyageai import Client
from langchain_experimental.text_splitter import SemanticChunker

Just separating imports and env variables/initializations

In [None]:
# load env
load_dotenv()
MONGO_USER = os.getenv("MONGO_USER")
MONGO_PWD = os.getenv("MONGO_PWD")
PINECONE_KEY_SERVERLESS = os.getenv("PINECONE_API_KEY")
OPENAI_API_KEY = os.getenv("OPENAI_KEY")
VOYAGE_API_KEY = os.getenv("VOYAGE_KEY")

uri = f"mongodb+srv://{MONGO_USER}:{MONGO_PWD}@passionaibot.4dwr2me.mongodb.net/?retryWrites=true&w=majority&appName=PassionAIBot"
# Create a new client and connect to the server
mongo_client = MongoClient(uri)

# Send a ping to confirm a successful connection
try:
    mongo_client.admin.command('ping')
    print("Successfully pinged MongoDB deployment.")
except Exception as e:
    print(e)

# connect to API database
_db = mongo_client['PassionAIDB_API']

# create reference for user database
_tests = _db['tests']
_users = _db['users']
_groups = _db['groups']
_api_users = _db['api_users']
_access_tokens = _db['access_tokens']

# create connection to pinecone database
# load pinecone instance
pinecone_client = Pinecone(api_key=PINECONE_KEY_SERVERLESS)
# get correct 'collection'
_pai_index = pinecone_client.Index("passion-ai-db-serverless")

# create OpenAI client
_openai_client = OpenAI(api_key=OPENAI_API_KEY)

# get voyage AI
_voyage_client = Client(api_key=VOYAGE_API_KEY)

# create semantic chunker instance
_text_splitter = SemanticChunker()

## Processing Files
Taking uploaded files, converting to screenshot format.
Needs to convert:
- Text into screenshots
- Images into screenshots without too much data
- Audio should be transcribed & chunked, embed text and/or audio chunks
- Video should have key frames sampled, transcribed audio

### Processing Text
I need to find a more effective way to chunk text documents. I will first use VoyageAI's multimodal on some text documents to test.

In [None]:
# example inputs
inputs = [
    {
        "content":
        [
            {
                "type": "text",
                "text": "A kitten is a juvenile cat. After being born, kittens display primary altriciality and are fully dependent on their mothers for survival. They normally do not open their eyes for seven to ten days. After about two weeks, kittens develop quickly and begin to explore the world outside their nest. After a further three to four weeks, they begin to eat solid food and grow baby teeth. Domestic kittens are highly social animals and usually enjoy human companionship."
            }
        ]
    }
]
inputs_2 = [
    {
        "content":
        [
            {
                "type": "text",
                "text": "The cat (Felis catus), also referred to as the domestic cat or house cat, is a small domesticated carnivorous mammal. It is the only domesticated species of the family Felidae. Advances in archaeology and genetics have shown that the domestication of the cat occurred in the Near East around 7500 BC. It is commonly kept as a pet and farm cat, but also ranges freely as a feral cat avoiding human contact. It is valued by humans for companionship and its ability to kill vermin. Its retractable claws are adapted to killing small prey species such as mice and rats. It has a strong, flexible body, quick reflexes, and sharp teeth, and its night vision and sense of smell are well developed. It is a social species, but a solitary hunter and a crepuscular predator."
            }
        ]
    }
]

image_input = [
    {
        "content":
        [
            {
                "type": "image_url",
                "image_url": "https://d2zp5xs5cp8zlg.cloudfront.net/image-86754-800.jpg"
            }
        ]
    }
]

# vectorize inputs
text_res = _voyage_client.multimodal_embed(inputs, model="voyage-multimodal-3")
text_res_2 = _voyage_client.multimodal_embed(inputs_2, model="voyage-multimodal-3")
image_res = _voyage_client.multimodal_embed(image_input, model="voyage-multimodal-3")

In [70]:
def get_data(voyager_output):
    print(f"Embeddings: {voyager_output.embeddings}\nImage pixels: {voyager_output.image_pixels}\nText tokens: {voyager_output.text_tokens}\nTotal tokens: {voyager_output.total_tokens}")

get_data(text_res)

def cosine(vec1, vec2):
    vec1 = np.array(vec1)
    vec2 = np.array(vec2)
    return np.dot(vec1, vec2)

print(cosine(text_res.embeddings[0], text_res_2.embeddings[0]))
print(cosine(text_res.embeddings[0], image_res.embeddings[0]))
print(cosine(text_res_2.embeddings[0], image_res.embeddings[0]))
    

Embeddings: [[0.0322265625, -0.04638671875, -0.0145263671875, -0.0196533203125, -0.01953125, -0.0166015625, -0.00732421875, -0.0086669921875, -0.01611328125, -0.0634765625, -0.0262451171875, 0.03173828125, -0.031494140625, -0.00555419921875, 0.0125732421875, 0.0791015625, -0.046142578125, 0.024658203125, 0.050537109375, 0.024169921875, -0.00396728515625, -0.0023040771484375, -0.064453125, 0.03515625, 0.017822265625, -0.03466796875, -0.03271484375, 0.0439453125, 0.013427734375, 0.0252685546875, -0.00127410888671875, 0.052734375, -0.0107421875, -0.012451171875, -0.0184326171875, 0.00665283203125, -0.06298828125, 0.01104736328125, -0.0277099609375, -0.057373046875, 0.00018405914306640625, 0.038330078125, 0.018310546875, -0.07763671875, -0.003021240234375, -0.03173828125, 0.04638671875, -0.032470703125, -0.045654296875, 0.03759765625, 0.053955078125, -0.03662109375, 0.006591796875, -0.0208740234375, -0.00640869140625, 0.0023040771484375, -0.03662109375, 0.02734375, 0.024169921875, 0.028442