In [3]:
!pip install langchain langchain-community sentence-transformers pinecone-client python-dotenv typing pydantic



In [4]:
!pip install -q sentence-transformers transformers torch

In [5]:
# Install required packages
!pip install -q langchain-cohere cohere langchain pinecone-client

In [6]:
from langchain import LLMChain, PromptTemplate
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.llms import HuggingFaceHub
from langchain.vectorstores import Pinecone as LangchainPinecone
from pinecone import Pinecone, ServerlessSpec
from typing import List, Dict
import json
import os

In [7]:
# Set API keys
HUGGINGFACE_TOKEN = "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"
PINECONE_API_KEY = "pcsk_44R4w_9mXN3CD1j1kqzSjUzzwYkDJePke4GioyChxpYWmK48Ro369U4vGmEKd5rtpFkLi"

# Set environment variables
os.environ["HUGGINGFACE_API_TOKEN"] = HUGGINGFACE_TOKEN
os.environ["HUGGINGFACEHUB_API_TOKEN"] = HUGGINGFACE_TOKEN
os.environ["PINECONE_API_KEY"] = PINECONE_API_KEY
os.environ["PINECONE_ENVIRONMENT"] = "gcp-starter"

In [8]:
!pip install --upgrade pinecone-client



In [16]:
# Initialize HuggingFaceEmbeddings
embedding_model = HuggingFaceEmbeddings(
    model_name="sentence-transformers/all-mpnet-base-v2",
    model_kwargs={"token": HUGGINGFACE_TOKEN}
)

# Initialize Pinecone
pc = Pinecone(api_key=PINECONE_API_KEY)

index_name = "cv-db"

# Initialize Pinecone vector store with LangChain
vectorstore = LangchainPinecone.from_existing_index(
    index_name=index_name,
    embedding=embedding_model,
)

# Initialize LLM with specific task
llm = HuggingFaceHub(
    repo_id="google/flan-t5-large",
    huggingfacehub_api_token=HUGGINGFACE_TOKEN,
    task="text2text-generation",
    model_kwargs={
        "temperature": 0.7,
        "max_length": 512
    }
)

In [25]:
from sentence_transformers import SentenceTransformer

index = pc.Index("cv-db")

def get_embedding(text):
    """
    Function to get the embeddings of a text
    :params text: the text to encode
    :return: the embeddings
    """
    model = SentenceTransformer("sentence-transformers/all-mpnet-base-v2")

    embeddings = model.encode([text])
    return embeddings[0]

In [28]:
from langchain.vectorstores import Pinecone as LangchainPinecone
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.llms import HuggingFaceHub
from langchain.prompts import PromptTemplate
from langchain.chains import LLMChain
from pinecone import Pinecone
from typing import Dict

# Initialize HuggingFace Embeddings
embedding_model = HuggingFaceEmbeddings(
    model_name="sentence-transformers/all-mpnet-base-v2",
    model_kwargs={"token": HUGGINGFACE_TOKEN}
)

# Initialize Pinecone
pc = Pinecone(api_key=PINECONE_API_KEY)

index_name = "cv-db"

# Initialize Pinecone vector store with LangChain
vectorstore = LangchainPinecone.from_existing_index(
    index_name=index_name,
    embedding=embedding_model,
    text_key="page_content"
)

# Initialize LLM
llm = HuggingFaceHub(
    repo_id="google/flan-t5-large",
    huggingfacehub_api_token=HUGGINGFACE_TOKEN,
    task="text2text-generation",
    model_kwargs={"temperature": 0.7, "max_length": 512}
)

# Define prompt template
resume_query_template = """
Given the following job requirements:
{job_requirements}

And the candidate information:
{candidate_info}

Please analyze and provide:
1. Match percentage
2. Key qualifications that align with requirements
3. Any gaps in qualifications
4. Overall recommendation

Response:"""

prompt = PromptTemplate(
    template=resume_query_template,
    input_variables=["job_requirements", "candidate_info"]
)

class ResumeSearchBot:
    def __init__(self):
        self.llm_chain = LLMChain(llm=llm, prompt=prompt)
        self.vectorstore = vectorstore

    def search_candidates(self, query, top_k=5):
        """Searches the Pinecone index for the most relevant candidates."""
        query_embed = get_embedding(query).tolist()  # Convert NumPy array to list

        results = index.query(
            vector=query_embed,
            top_k=top_k,
            include_metadata=True
        )

        # Extract candidate information
        candidates = []
        print(results)  # Debugging output to inspect results

        for match in results["matches"]:  # Correctly loop through retrieved matches
            metadata = match["metadata"]  # Extract metadata
            candidate_info = f"Content: {metadata.get('content', 'N/A')}\n" \
                            f"Original File: {metadata.get('original_file', 'N/A')}\n" \
                            f"Score: {match.get('score', 'N/A')}"  # Include similarity score

            candidates.append(candidate_info)

        return candidates


    def add_resume(self, resume_text: str, metadata: Dict = None):
        """Add a resume to the vector store"""
        try:
            # Add text to vector store
            ids = self.vectorstore.add_texts(
                texts=[resume_text],
                metadatas=[metadata] if metadata else None
            )
            return ids[0]  # Return the first ID
        except Exception as e:
            print(f"Error adding resume: {str(e)}")
            return None

def test_bot():
    bot = ResumeSearchBot()

    print("\nTesting search functionality...")
    query = "Looking for a developer with Docker, AWS knowledge"
    results = bot.search_candidates(query)

    print("\nSearch Results:")
    for idx, candidate in enumerate(results, 1):
        print(f"\nCandidate {idx}:")
        print(candidate)  # Correctly displaying retrieved candidate info

if __name__ == "__main__":
    test_bot()



Testing search functionality...
{'matches': [{'id': 'extraction_summary.csv_chunk_131',
              'metadata': {'content': '. Skilled in managing real-time data '
                                      'processing with Apache Kafka, '
                                      'containerization with Docker, and '
                                      'version control with Git. Qualified in '
                                      'Kubernetes for container orchestration '
                                      'and Tableau for data visualization. '
                                      'Adept at working with databases, '
                                      'including MongoDB and PostgreSQL, and '
                                      'experienced in deploying applications '
                                      'on Linux environments. Proven ability '
                                      'to integrate workflow automation using '
                                      'Camunda and dynamic 

## API Endpoints

In [None]:
from fastapi import FastAPI

app = FastAPI()
bot = ResumeSearchBot()

@app.post("/search_candidates")
async def search_candidates(query: str):
    return bot.search_candidates(query)

@app.get("/candidate/{candidate_id}")
async def get_candidate(candidate_id: str):
    return bot.get_candidate_details(candidate_id)