# Demo Notebook
This notebook provides a simple demonstration of using DeepSeek Chat (Deepseek-v3) API using LangChain

In [1]:
from langchain.document_loaders import TextLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_huggingface import HuggingFaceEmbeddings
from langchain.vectorstores import Chroma

from dotenv import load_dotenv
import os

load_dotenv()
api_key = os.getenv('DEEPSEEK_API_KEY')

### Create Embedding

In [None]:

# Load and split text
loader = TextLoader("data/demo_data.txt")
documents = loader.load()
text_splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=50)
docs = text_splitter.split_documents(documents)

# Generate embeddings & store in ChromaDB
embedding_model = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
vector_db = Chroma.from_documents(docs, embedding_model)


### Retrieve relevant documents

In [4]:
query = "What is the phenomenon that allows a system to exist in multiple configurations?"
retrieved_docs = vector_db.similarity_search(query, k=3)
retrieved_texts = "\n".join([doc.page_content for doc in retrieved_docs])


### Generate Response with LLM

In [5]:
from langchain_deepseek import ChatDeepSeek

llm = ChatDeepSeek(
    model="deepseek-chat",  # or "deepseek-reasoner" for DeepSeek-R1
    temperature=0.7,
    max_tokens=150,
    timeout=30,
    max_retries=2,
    # api_key=api_key,
    # user='vtohal@gmail.com'
)

response = llm.invoke(f"Answer using this context:\n{retrieved_texts}\n\nQuestion: {query}")
print(response.content)

The phenomenon that allows a system to exist in multiple configurations is **superposition**. In quantum mechanics, superposition refers to the state in which a quantum particle or system can represent not just one possibility, but a combination of multiple possibilities simultaneously. This is a fundamental principle that enables quantum computers to explore many potential solutions at once, unlike classical computers, which are deterministic and process one solution at a time.


## Keyword Extraction (Optional)

In [22]:
install_libraries = False # Set to true if required
if install_libraries:
    print('Installing libraries.')
    %pip install spacy yake scikit-learn keybert
    
    import spacy
    spacy.cli.download("en_core_web_sm")
    
    


In [14]:
# Set Job description
use_sample_jd = False
if use_sample_jd:
    job_description = """We are looking for a Machine Learning Engineer with experience in Python, TensorFlow, 
and cloud computing (AWS/GCP). The ideal candidate should have a strong background in deep learning and 
natural language processing (NLP)."""
else:
    job_description = input()

In [None]:
# Extract keywords using LLM
prompt_extract_kw = "Extract keywords related to job description that would help in matching a potential resume to it. Return just a list of comma separated keywords"
llm_keywords_response = llm.invoke(f"{prompt_extract_kw}\n\nJob Description:\n{job_description}")
llm_keywords = [kw.strip() for kw in llm_keywords_response.content.split(',')]
print("LLM extracted keywords:")
print(llm_keywords)


In [23]:
import spacy
nlp = spacy.load("en_core_web_sm")

def extract_keywords_spacy(text):
    doc = nlp(text)
    keywords = set()

    # Extract noun chunks (phrases like "machine learning engineer")
    keywords.update(chunk.text for chunk in doc.noun_chunks)

    # Extract Named Entities (like "Python", "Google Cloud", etc.)
    keywords.update(ent.text for ent in doc.ents)

    return list(keywords)



keywords = extract_keywords_spacy(job_description)
print(keywords)


['the desire', 'excellence', 'Publication record', 'Familiarity', 'Pandas', 'Collaborate', 'client-centric solutions', 'Required Skills / Expertise  We', 'ARIMA', 'software', 'the application', 'machine learning', 'Required Qualifications', 'Required Skills / Expertise', 'patterns', 'a new applied domain', 'peer-reviewed academic conferences', 'Desire', 'ownership', 'Engineering Research', 'a problem', 'relevant journals', 'project experience', 'Key Responsibilities', 'efficiency', 'TensorFlow', '(M.Sc/Ph.D', 'programming language', 'Canada Preferred Qualifications', 'Canada', 'Solid understanding', 'minimum viable products', 'GRUs', 'Design', 'machine intelligence Intellectual curiosity', 'Optimize ML pipelines', 'A positive attitude', 'time series data', 'classical time series models', 'Publication', 'a graduate level program', 'Completion', 'ML/DL', 'scalability', 'regular client meetings', 'Facebook Prophet', 'machine', 'Python', 'a talented and enthusiastic individual', 'ML', 'Opt

In [24]:
from yake import KeywordExtractor

def extract_keywords_yake(text, top_k=20):
    extractor = KeywordExtractor(lan="en", n=3, top=top_k)
    keywords = extractor.extract_keywords(text)
    return [kw[0] for kw in keywords]

keywords = extract_keywords_yake(job_description)
print(keywords)

['Expertise We ’re', 'time series data', 'time series', 'time series models', 'Canada Preferred Qualifications', 'capabilities Required Qualifications', 'series data', 'classical time series', 'experience working', 'time series classification', 'project experience working', 'machine learning Familiarity', 'solid knowledge', 'Required Qualifications', 'Expertise', 'time', 'enthusiastic individual', 'machine learning', 'series', 'skills Interdisciplinary team']


In [25]:
from sklearn.feature_extraction.text import TfidfVectorizer

def extract_keywords_tfidf(text, top_k=10):
    vectorizer = TfidfVectorizer(stop_words="english", max_features=top_k)
    X = vectorizer.fit_transform([text])
    return vectorizer.get_feature_names_out()

keywords = extract_keywords_tfidf(job_description, top_k=20)
print(keywords)

['client' 'data' 'desire' 'engineering' 'experience' 'learn' 'learning'
 'machine' 'ml' 'models' 'new' 'qualifications' 'required' 'series'
 'solid' 'technical' 'techniques' 'time' 'understanding' 'working']


In [18]:
from keybert import KeyBERT

kw_model = KeyBERT()

def extract_keywords_bert(text, top_n=10):
    keywords = kw_model.extract_keywords(text, keyphrase_ngram_range=(1,3), stop_words="english", top_n=top_n)
    return [kw[0] for kw in keywords]

print(extract_keywords_bert(job_description, top_n=20))


['skills expertise looking', 'required skills expertise', 'skills expertise', 'capabilities required qualifications', 'required skills', 'expertise looking talented', 'science ml engineering', 'expertise looking', 'ml engineering', 'ml pipelines', 'learn tensorflow', 'required qualifications', 'expertise', 'time processing capabilities', 'related ml frameworks', 'machine learning experience', 'qualifications', 'skills interdisciplinary team', 'processing capabilities required', 'ml engineering research']
