In [1]:
!pip install -U pinecone-client sentence-transformers

Collecting pinecone-client
  Downloading pinecone_client-2.2.4-py3-none-any.whl (179 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m179.4/179.4 kB[0m [31m3.0 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting sentence-transformers
  Downloading sentence-transformers-2.2.2.tar.gz (85 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m86.0/86.0 kB[0m [31m5.7 MB/s[0m eta [36m0:00:00[0m
[?25h  Preparing metadata (setup.py) ... [?25l[?25hdone
Collecting loguru>=0.5.0 (from pinecone-client)
  Downloading loguru-0.7.2-py3-none-any.whl (62 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m62.5/62.5 kB[0m [31m6.0 MB/s[0m eta [36m0:00:00[0m
Collecting dnspython>=2.0.0 (from pinecone-client)
  Downloading dnspython-2.4.2-py3-none-any.whl (300 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m300.4/300.4 kB[0m [31m10.6 MB/s[0m eta [36m0:00:00[0m
Collecting sentencepiece (from sentence-transformers)
  Download

In [2]:
import os
import pinecone

PINECONE_API_KEY = '///'
PINECONE_ENV = 'gcp-starter'

pinecone.init(
    api_key=PINECONE_API_KEY,
    environment=PINECONE_ENV
)

  from tqdm.autonotebook import tqdm


In [3]:
from sentence_transformers import SentenceTransformer

# import torch
# device = 'cuda' if torch.cuda.is_available() else 'cpu'
# print(device)

device = 'cuda'

model = SentenceTransformer('all-MiniLM-L6-v2', device=device)
model

.gitattributes:   0%|          | 0.00/1.18k [00:00<?, ?B/s]

1_Pooling/config.json:   0%|          | 0.00/190 [00:00<?, ?B/s]

README.md:   0%|          | 0.00/10.6k [00:00<?, ?B/s]

config.json:   0%|          | 0.00/612 [00:00<?, ?B/s]

config_sentence_transformers.json:   0%|          | 0.00/116 [00:00<?, ?B/s]

data_config.json:   0%|          | 0.00/39.3k [00:00<?, ?B/s]

pytorch_model.bin:   0%|          | 0.00/90.9M [00:00<?, ?B/s]

sentence_bert_config.json:   0%|          | 0.00/53.0 [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/112 [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/466k [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/350 [00:00<?, ?B/s]

train_script.py:   0%|          | 0.00/13.2k [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

modules.json:   0%|          | 0.00/349 [00:00<?, ?B/s]

SentenceTransformer(
  (0): Transformer({'max_seq_length': 256, 'do_lower_case': False}) with Transformer model: BertModel 
  (1): Pooling({'word_embedding_dimension': 384, 'pooling_mode_cls_token': False, 'pooling_mode_mean_tokens': True, 'pooling_mode_max_tokens': False, 'pooling_mode_mean_sqrt_len_tokens': False})
  (2): Normalize()
)

In [4]:
sample = 'whatwant is good!'

encoded_sample = model.encode(sample)
encoded_sample.shape

(384,)

In [5]:
index_name = 'sample-search'

if index_name not in pinecone.list_indexes():
    pinecone.create_index(
        name=index_name,
        dimension=model.get_sentence_embedding_dimension(),
        metric='cosine'
    )

index = pinecone.Index(index_name)

In [6]:
data = [
    "Python is admired for its easy-to-learn syntax and versatility, making it a popular choice for applications in web development, data science, and artificial intelligence.",
    "JavaScript plays a crucial role in making web pages interactive and dynamic, and is widely used in both front-end and back-end development across the web industry.",
    "Java offers strong memory management and platform-independent capabilities, making it a preferred language for enterprise-level applications and Android app development.",
    "C++ is known for its high performance and direct memory control, which makes it particularly suited for game development and systems programming.",
    "C#, built on the .NET framework, combines object-oriented principles with stability and efficiency, commonly used in Windows applications and game development.",
    "Ruby, with its high readability and the powerful Rails framework, is often the go-to language for rapid web application development and scripting tasks.",
    "PHP is a server-side scripting language that is easy to pick up and widely used for web development, especially in content management systems.",
    "Swift is tailored for Apple's ecosystem, offering safety and speed, and is predominantly used in iOS and macOS app development.",
    "Go, or Golang, designed by Google, is notable for its simplicity and efficiency, especially in handling concurrent tasks, making it suitable for cloud and network programming.",
    "Rust is focused on memory safety and performance, often chosen for system programming, embedded systems, and applications where high performance is critical."
]

In [7]:
ids = [str(x) for x in range(0, len(data))]
embedded_datas = model.encode(data).tolist()
meta_datas = [{'text': text} for text in data]

records = zip(ids, embedded_datas, meta_datas)

In [8]:
index.upsert(vectors = records)

{'upserted_count': 10}

In [9]:
index.describe_index_stats()

{'dimension': 384,
 'index_fullness': 0.0001,
 'namespaces': {'': {'vector_count': 10}},
 'total_vector_count': 10}

In [10]:
def request_query(query):
    embedded_query = model.encode(query).tolist()
    results = index.query(embedded_query, top_k=2, include_metadata=True)

    return [{'score': match['score'], 'text': match['metadata']['text']} for match in results['matches']]

In [11]:
request_query("What is the Python?")

[{'score': 0.704776,
  'text': 'Python is admired for its easy-to-learn syntax and versatility, making it a popular choice for applications in web development, data science, and artificial intelligence.'},
 {'score': 0.336612493,
  'text': 'Ruby, with its high readability and the powerful Rails framework, is often the go-to language for rapid web application development and scripting tasks.'}]

In [12]:
 request_query("Explain the JavaScript")

[{'score': 0.625493288,
  'text': 'JavaScript plays a crucial role in making web pages interactive and dynamic, and is widely used in both front-end and back-end development across the web industry.'},
 {'score': 0.30379954,
  'text': 'PHP is a server-side scripting language that is easy to pick up and widely used for web development, especially in content management systems.'}]