In [1]:
from dotenv import load_dotenv
import os

# .env 파일에서 환경 변수를 로드합니다.
load_dotenv()

# 환경 변수에서 PINECONE_API_KEY를 가져옵니다.
PINECONE_API_KEY = os.environ['PINECONE_API_KEY']

# index 생성

In [3]:
from pinecone import Pinecone
# 클라이언트 생성
pinecone_client = Pinecone(api_key=PINECONE_API_KEY)

In [5]:
pinecone_client

<pinecone.pinecone.Pinecone at 0x19421012c60>

In [6]:
from pinecone import ServerlessSpec
pinecone_client.create_index(
  name='embedding-3d',
  dimension=3,
  metric='cosine',
  spec=ServerlessSpec(
    cloud='aws',
    region='us-east-1'
  )
)

{
    "name": "embedding-3d",
    "metric": "cosine",
    "host": "embedding-3d-506yx3r.svc.aped-4627-b74a.pinecone.io",
    "spec": {
        "serverless": {
            "cloud": "aws",
            "region": "us-east-1"
        }
    },
    "status": {
        "ready": true,
        "state": "Ready"
    },
    "vector_type": "dense",
    "dimension": 3,
    "deletion_protection": "disabled",
    "tags": null
}

In [7]:
index = pinecone_client.Index('embedding-3d')
index

  from .autonotebook import tqdm as notebook_tqdm


<pinecone.db_data.index.Index at 0x194219f3620>

In [8]:
index.describe_index_stats()

{'dimension': 3,
 'index_fullness': 0.0,
 'metric': 'cosine',
 'namespaces': {},
 'total_vector_count': 0,
 'vector_type': 'dense'}

In [11]:
for ids in index.list(namespace='embedding-3d-ns1'):
  print(ids)

# pinecone data 형식
[{ 'id' : 'vec1' , 'values' :[1.0, 1.5, 2.0] , 'metadata' : {'genre':'drama'}},
{ 'id' : 'vec2' , 'values' :[2.0, 1.5, 0.5] , 'metadata' : {'genre':'action'}},
{ 'id' : 'vec3' , 'values' :[0.1, 0.3, 0.5] , 'metadata' : {'genre':'drama'}},
{ 'id' : 'vec4' , 'values' :[1.0, 2.5, 3.5] , 'metadata' : {'genre':'action'}},
{ 'id' : 'vec5' , 'values' :[3.0, 1.2, 1.3] , 'metadata' : {'genre':'action'}},
{ 'id' : 'vec6' , 'values' :[0.3, 1.1, 2.5] , 'metadata' : {'genre':'drama'}},
]

In [12]:
index.upsert(
  vectors=[{ 'id' : 'vec1' , 'values' :[1.0, 1.5, 2.0] , 'metadata' : {'genre':'drama'}},
    { 'id' : 'vec2' , 'values' :[2.0, 1.5, 0.5] , 'metadata' : {'genre':'action'}},
    { 'id' : 'vec3' , 'values' :[0.1, 0.3, 0.5] , 'metadata' : {'genre':'drama'}},
    { 'id' : 'vec4' , 'values' :[1.0, 2.5, 3.5] , 'metadata' : {'genre':'action'}},
    { 'id' : 'vec5' , 'values' :[3.0, 1.2, 1.3] , 'metadata' : {'genre':'action'}},
    { 'id' : 'vec6' , 'values' :[0.3, 1.1, 2.5] , 'metadata' : {'genre':'drama'}},
    ],
  namespace='embedding-3d-ns1'
  ) #update + insert

{'upserted_count': 6}

In [13]:
index.describe_index_stats()

{'dimension': 3,
 'index_fullness': 0.0,
 'metric': 'cosine',
 'namespaces': {'embedding-3d-ns1': {'vector_count': 6}},
 'total_vector_count': 6,
 'vector_type': 'dense'}

# id list

In [14]:
for ids in index.list(namespace='embedding-3d-ns1'):
  print(ids)

['vec1', 'vec2', 'vec3', 'vec4', 'vec5', 'vec6']


# query

In [16]:
response = index.query(
  namespace='embedding-3d-ns1',
  vector=[0.1, 0.4, 0.7],
  top_k=3,
  include_values=True,
  include_metadata=True,
  filter={'genre':{'$eq':'drama'}}
)
response

{'matches': [{'id': 'vec3',
              'metadata': {'genre': 'drama'},
              'score': 1.00008953,
              'values': [0.1, 0.3, 0.5]},
             {'id': 'vec6',
              'metadata': {'genre': 'drama'},
              'score': 0.99550879,
              'values': [0.3, 1.1, 2.5]},
             {'id': 'vec1',
              'metadata': {'genre': 'drama'},
              'score': 0.961182,
              'values': [1.0, 1.5, 2.0]}],
 'namespace': 'embedding-3d-ns1',
 'usage': {'read_units': 1}}