In [1]:
pip install torch transformers faiss-cpu numpy

Note: you may need to restart the kernel to use updated packages.



[notice] A new release of pip is available: 24.2 -> 25.0.1
[notice] To update, run: python.exe -m pip install --upgrade pip


In [2]:
import torch
from transformers import BertTokenizer, BertModel

# Load pre-trained BERT model & tokenizer
tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
model = BertModel.from_pretrained('bert-base-uncased')

def get_bert_embedding(text):
    tokens = tokenizer(text, return_tensors='pt', padding=True, truncation=True)
    with torch.no_grad():
        output = model(**tokens)
    return output.last_hidden_state.mean(dim=1).numpy()  # Mean pooling for sentence embedding


model.safetensors:   0%|          | 0.00/440M [00:00<?, ?B/s]

To support symlinks on Windows, you either need to activate Developer Mode or to run Python as an administrator. In order to see activate developer mode, see this article: https://docs.microsoft.com/en-us/windows/apps/get-started/enable-your-device-for-development


In [5]:
freelancers = [
    "Python, AI, Machine Learning, TensorFlow",
    "Python, TensorFlow, Deep Learning, Keras",
    "Python, AI Development, TensorFlow, PyTorch",
    "Python, Machine Learning, TensorFlow, Pandas",
    "Python, TensorFlow, NLP, Scikit-learn",
    "Python, AI, TensorFlow, Data Engineering",
    "Python, TensorFlow, Computer Vision, OpenCV",
    "Python, Machine Learning, TensorFlow, Flask",
    "Python, AI, TensorFlow, NumPy, SciPy",
    "Python, TensorFlow, Reinforcement Learning, Gym",
    "Python, AI Development, TensorFlow, AWS",
    "Python, TensorFlow, Time Series Analysis, Statsmodels",
    "Python, Machine Learning, TensorFlow, Jupyter",
    "Python, AI, TensorFlow, Docker, Kubernetes",
    "Python, TensorFlow, Generative AI, GANs"
]

freelancer_embeddings = [get_bert_embedding(profile) for profile in freelancers]


In [6]:
import faiss
import numpy as np

# Convert embeddings to NumPy array
embedding_dim = freelancer_embeddings[0].shape[1]
index = faiss.IndexFlatL2(embedding_dim)  # L2 (Euclidean distance) search
index.add(np.vstack(freelancer_embeddings))  # Add all freelancer embeddings


In [7]:
job_description = "Looking for a Python AI Developer with TensorFlow experience"
job_embedding = get_bert_embedding(job_description)

# Search for the top 3 similar freelancers
k = 3  
distances, indices = index.search(job_embedding, k)

print("Top Freelancer Matches:", indices)


Top Freelancer Matches: [[ 2  1 10]]


In [9]:
from sklearn.metrics.pairwise import cosine_similarity
import numpy as np

# Convert embeddings to NumPy array
embeddings_array = np.array(freelancer_embeddings)

# Get job description embedding
job_description2 = "Looking for an AI Developer skilled in TensorFlow"
job_embedding2 = get_bert_embedding(job_description2)

# Compute cosine similarity between job and freelancers
similarities = cosine_similarity([job_embedding2], embeddings_array)

# Get top 3 matches
top_indices = np.argsort(similarities[0])[::-1][:3]
print("Top Freelancer Matches:", top_indices)


ValueError: Found array with dim 3. check_pairwise_arrays expected <= 2.

In [10]:
freelancer_embeddings = [get_bert_embedding(profile).squeeze() for profile in freelancers]
job_embedding = get_bert_embedding(job_description).squeeze()


In [11]:
from sklearn.metrics.pairwise import cosine_similarity
import numpy as np

freelancer_embeddings = np.array([get_bert_embedding(profile).squeeze() for profile in freelancers])
job_embedding = get_bert_embedding(job_description).squeeze()

# Compute cosine similarity
similarities = cosine_similarity([job_embedding], freelancer_embeddings)

# Get top 3 matches
top_indices = np.argsort(similarities[0])[::-1][:3]
print("Top Freelancer Matches:", top_indices)


Top Freelancer Matches: [ 2 12  9]
