# 1. Download the knowledgebase

In [None]:
from datasets import load_dataset

ds = load_dataset("jkhedri/psychology-dataset")

ds["train"].to_csv("train.csv")

# 2. Load the knowledge base

In [None]:
import pandas as pd

df = pd.read_csv("train.csv")
df_records = df.to_dict(orient="records")

# 3. Import the knowledge base to the database

In [None]:
from services import knowledge_base_services
from services import llm_services
for record in df_records[:5]:
    knowledge_base_services.create(
        record["question"],
        [record["response_j"], record["response_k"]],
        llm_services.get_word_embeddings(record["question"]),
    )

In [None]:
len(llm_services.get_word_embeddings("What is the meaning of life?"))

# 4. Create Vector Index on MongoDB Compass

# 5. Try retrieving vector

In [19]:
from services import knowledge_base_services
from services import llm_services

collection = knowledge_base_services.collection

# define pipeline
pipeline = [
  {
    '$vectorSearch': {
      'index': 'default', 
      'path': 'vector',
      'queryVector': llm_services.get_word_embeddings("I'm feeling really anxious lately and I don't know why."),
      'numCandidates': 150, 
      'limit': 10
    }
  }, 
  {
    '$project': { 
    'sampleQuestion':1,
    'sampleAnswers':1,
      'score': {
        '$meta': 'vectorSearchScore'
      }
    }
  }
]

# run pipeline
result = collection.aggregate(pipeline)

# print results
for i in result:
    print(i)
 

{'_id': ObjectId('6725f346165db9697d6dcb03'), 'sampleQuestion': "I'm feeling really anxious lately and I don't know why.", 'sampleAnswers': ["It's common to feel anxious at times, and there can be many reasons for it. Have there been any recent changes or stressors in your life that may be contributing to your anxiety? Let's work together to identify any triggers and develop coping strategies to manage your anxiety.", 'Anxiety is just a sign of weakness. You need to toughen up and stop worrying so much. Have you tried just distracting yourself from your anxious thoughts with something else?'], 'score': 1.0}
{'_id': ObjectId('6725f34a165db9697d6dcb07'), 'sampleQuestion': "I've been feeling really anxious lately, and I don't know why.", 'sampleAnswers': ["It's common to feel anxious without knowing the exact cause. Would you like to explore some possible triggers or coping mechanisms that could help you manage your anxiety?", "Just try to relax and stop thinking about it. Anxiety is all 

# 6. Insert all knowledge base to the database

In [2]:
import pandas as pd
from utils import helpers
from services import knowledge_base_services

df_records = pd.read_csv("train.csv").to_dict(orient="records")
data = [
    {
        "question": record["question"],
        "answers": [record["response_j"], record["response_k"]],
    }
    for record in df_records
]

helpers.parallelize(data, knowledge_base_services.create)

  0%|          | 0/9846 [00:00<?, ?it/s]2024-11-02 18:53:24,614 - my_logger - INFO - Inserted new question-answer pair with id: 672612b4b14f417ca9c2f385
  0%|          | 16/9846 [00:01<14:51, 11.02it/s]2024-11-02 18:53:24,637 - my_logger - INFO - Inserted new question-answer pair with id: 672612b4b14f417ca9c2f386
2024-11-02 18:53:24,710 - my_logger - INFO - Inserted new question-answer pair with id: 672612b4b14f417ca9c2f387
2024-11-02 18:53:24,783 - my_logger - INFO - Inserted new question-answer pair with id: 672612b4b14f417ca9c2f388
2024-11-02 18:53:25,005 - my_logger - INFO - Inserted new question-answer pair with id: 672612b4b14f417ca9c2f389
2024-11-02 18:53:25,139 - my_logger - INFO - Inserted new question-answer pair with id: 672612b5b14f417ca9c2f38a
2024-11-02 18:53:25,170 - my_logger - INFO - Inserted new question-answer pair with id: 672612b5b14f417ca9c2f38b
2024-11-02 18:53:25,223 - my_logger - INFO - Inserted new question-answer pair with id: 672612b5b14f417ca9c2f38c
2024-11

[None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,

# 7. Test the retriever

In [9]:
from services import knowledge_base_services

knowledge_base_services.retrive_by_question("hi, I feel anxious", 0.7)

[{'_id': ObjectId('67261530b14f417ca9c2ff47'),
  'sampleQuestion': "I'm feeling really anxious lately, I don't know what to do.",
  'sampleAnswers': ["It sounds like you're experiencing a lot of stress. Let's talk about some coping mechanisms you can use to manage your anxiety. Have you tried deep breathing or mindfulness exercises?",
   'Anxiety is all in your head. You just need to stop worrying so much and toughen up.'],
  'score': 0.836165189743042},
 {'_id': ObjectId('67261a24b14f417ca9c31466'),
  'sampleQuestion': "I'm feeling really anxious lately. I don't know what to do.",
  'sampleAnswers': ["It's understandable to feel anxious in uncertain times. Let's work on some relaxation techniques and coping strategies to manage your anxiety.",
   "Just try to distract yourself and think positive thoughts. You'll be fine."],
  'score': 0.830414891242981},
 {'_id': ObjectId('672613a4b14f417ca9c2f813'),
  'sampleQuestion': "I'm feeling really anxious lately, I don't know how to calm down