In [14]:
from dotenv import load_dotenv
load_dotenv()
import os
from openai import OpenAI
from pinecone import Pinecone, ServerlessSpec

In [5]:
pc = Pinecone(api_key=os.getenv("PINECONE_API_KEY"))
pc.create_index(
    name="rag", dimension=1536, metric="cosine", spec=ServerlessSpec(cloud="aws", region="us-east-1")
)

In [12]:
import json
data = json.load(open("reviews.json"))
data['reviews']

[{'professor': 'Dr. Emily Thompson',
  'subject': 'Mathematics',
  'stars': 4,
  'review': 'Dr. Thompson explains complex concepts clearly. Her enthusiasm for math is contagious!'},
 {'professor': 'Prof. Michael Chen',
  'subject': 'Computer Science',
  'stars': 5,
  'review': "Brilliant instructor. Prof. Chen's projects are challenging but incredibly rewarding."},
 {'professor': 'Dr. Sarah Johnson',
  'subject': 'Biology',
  'stars': 3,
  'review': "Lectures can be dry, but Dr. Johnson's lab sessions are engaging and informative."},
 {'professor': 'Prof. David Martinez',
  'subject': 'History',
  'stars': 4,
  'review': 'Prof. Martinez brings history to life with fascinating anecdotes and discussions.'},
 {'professor': 'Dr. Rachel Lee',
  'subject': 'Psychology',
  'stars': 5,
  'review': "Exceptional teacher. Dr. Lee's classes are thought-provoking and well-structured."},
 {'professor': 'Prof. Robert Wilson',
  'subject': 'Physics',
  'stars': 2,
  'review': 'Brilliant mind, but Prof

In [15]:
processed_data = []
client = OpenAI()

#Creating Embeddings
for review in data['reviews']:
    response = client.embeddings.create(
        input=review['review'],
        model="text-embedding-3-small",              
    )
    embedding = response.data[0].embedding
    processed_data.append({
        "values": embedding,
        "id": review["professor"],
        "metadata": {
            "review": review['review'],
            "subject": review['subject'],
            "stars": review["stars"]
                              
        }
    })

In [16]:
processed_data[0]

{'values': [0.018163502,
  -0.036459487,
  -0.036221016,
  0.049177915,
  0.030630209,
  0.018070763,
  0.023184631,
  0.035664584,
  -0.011433335,
  -0.0135332,
  0.018123757,
  0.010777541,
  -0.0021147695,
  -0.03508166,
  0.030550718,
  0.04178533,
  0.0009133088,
  -0.026841179,
  0.02529112,
  0.035187647,
  0.026973661,
  -0.022482468,
  0.022999153,
  -0.0020849607,
  -0.029914798,
  -0.05834578,
  0.024906917,
  0.030736197,
  -0.002096553,
  -0.035929553,
  0.06539391,
  -0.019846044,
  0.008922771,
  -0.029623333,
  -0.010512574,
  0.034551725,
  -0.043984555,
  0.02044222,
  0.007836405,
  -0.0035406244,
  0.012003015,
  0.032908928,
  -0.027238628,
  0.033253383,
  0.002798716,
  -0.005521254,
  -0.041308388,
  -0.024681695,
  0.02472144,
  0.034101278,
  -0.013579569,
  0.015235614,
  -0.0035737453,
  -0.0014854724,
  -0.086167336,
  0.04621028,
  0.022932911,
  0.028166015,
  -0.0037294135,
  -0.074243814,
  0.043666594,
  -0.022932911,
  -0.0019458529,
  0.019263117,
  

In [17]:
index = pc.Index('rag')
index.upsert(
    vectors=processed_data,
    namespace="ns1"
)

{'upserted_count': 20}

In [18]:
index.describe_index_stats()

{'dimension': 1536,
 'index_fullness': 0.0,
 'namespaces': {'ns1': {'vector_count': 20}},
 'total_vector_count': 20}