In [1]:
from google.colab import drive
drive.mount('/content/drive')


Mounted at /content/drive


In [2]:
!pip install sentence-transformers faiss-cpu pandas numpy
import pandas as pd
import numpy as np
import json
from sentence_transformers import SentenceTransformer
import faiss


Collecting faiss-cpu
  Downloading faiss_cpu-1.11.0.post1-cp311-cp311-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl.metadata (5.0 kB)
Collecting nvidia-cuda-nvrtc-cu12==12.4.127 (from torch>=1.11.0->sentence-transformers)
  Downloading nvidia_cuda_nvrtc_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-runtime-cu12==12.4.127 (from torch>=1.11.0->sentence-transformers)
  Downloading nvidia_cuda_runtime_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-cupti-cu12==12.4.127 (from torch>=1.11.0->sentence-transformers)
  Downloading nvidia_cuda_cupti_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cudnn-cu12==9.1.0.70 (from torch>=1.11.0->sentence-transformers)
  Downloading nvidia_cudnn_cu12-9.1.0.70-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cublas-cu12==12.4.5.8 (from torch>=1.11.0->sentence-transformers)
  Downloading nvidia_cublas_cu12-12.4.5.8-

In [4]:
# Load data (upload indian_synthetic_brands.json to Colab first)
with open('/content/drive/MyDrive/Colab Notebooks/indian_synthetic_brands.json', 'r') as f:
    brands_data = json.load(f)

brands_df = pd.DataFrame(brands_data)
print(brands_df[['title', 'description', 'category']].head())

                            title                                description  \
0                  Sanchez-Taylor        Customer-focused systematic support   
1  Gardner, Robinson and Lawrence          Adaptive well-modulated workforce   
2   Stevens, Martinez and Nielsen                 Configurable neutral frame   
3                    Martin-Kelly  Synchronized encompassing standardization   
4                       Cohen Inc            Focused 5thgeneration workforce   

        category  
0         Beauty  
1           Tech  
2  Entertainment  
3         Travel  
4         Beauty  


In [5]:
def brand_text(row):
    fields = [
        row.get('title', ''),
        row.get('description', ''),
        row.get('category', ''),
        ' '.join(row.get('platforms', [])),
        str(row.get('budget', '')),
        ' '.join(row.get('targetAudience', {}).get('interests', [])),
        ','.join(map(str, row.get('targetAudience', {}).get('locations', []))),
        row.get('metadata', {}).get('language', ''),
        row.get('metadata', {}).get('caption', ''),
    ]
    return ' | '.join([str(f) for f in fields if f])

brands_df['embedding_text'] = brands_df.apply(brand_text, axis=1)


In [6]:
model = SentenceTransformer('all-MiniLM-L6-v2')
brand_embeddings = model.encode(brands_df['embedding_text'].tolist(), show_progress_bar=True)
embedding_dim = brand_embeddings.shape[1]
brand_index = faiss.IndexFlatL2(embedding_dim)
brand_index.add(brand_embeddings.astype('float32'))


The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


modules.json:   0%|          | 0.00/349 [00:00<?, ?B/s]

config_sentence_transformers.json:   0%|          | 0.00/116 [00:00<?, ?B/s]

README.md: 0.00B [00:00, ?B/s]

sentence_bert_config.json:   0%|          | 0.00/53.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/612 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/90.9M [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/350 [00:00<?, ?B/s]

vocab.txt: 0.00B [00:00, ?B/s]

tokenizer.json: 0.00B [00:00, ?B/s]

special_tokens_map.json:   0%|          | 0.00/112 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/190 [00:00<?, ?B/s]

Batches:   0%|          | 0/16 [00:00<?, ?it/s]

In [7]:
def influencer_text(influencer):
    # Example: edit keys to match your influencer data structure
    fields = [
        influencer.get('bio', ''),
        influencer.get('categories', ''),
        influencer.get('platform', ''),
        influencer.get('location', ''),
        influencer.get('language', ''),
        influencer.get('interests', ''),
        str(influencer.get('follower_count', '')),
        str(influencer.get('engagement_rate', ''))
    ]
    return ' | '.join([str(f) for f in fields if f])

def recommend_brands_for_influencer(influencer, top_k=5):
    query_text = influencer_text(influencer)
    query_embedding = model.encode([query_text])
    D, I = brand_index.search(query_embedding.astype('float32'), top_k)
    results = brands_df.iloc[I[0]]
    display_cols = ['title', 'description', 'category', 'platforms', 'budget', 'targetAudience', 'requirements', 'metadata']
    return results[display_cols]


In [8]:
# Dummy influencer example (replace with real data from your influencers)
influencer_dict = {
    'bio': 'Comedy content creator, loves gadgets and tech.',
    'categories': 'Comedy, Gadgets',
    'platform': 'tiktok',
    'location': 'India',
    'language': 'Hindi',
    'interests': 'Comedy, Gadgets, Tech',
    'follower_count': 15000,
    'engagement_rate': 6
}

result = recommend_brands_for_influencer(influencer_dict, top_k=5)
print(result)


                         title                                    description  \
317  Carroll, Choi and Collier  Ergonomic full-range Graphical User Interface   
193   Camacho, Wells and Keith         Phased exuding artificial intelligence   
354               Barnes-Grant                Ergonomic bottom-line moderator   
367  Buchanan, Tucker and Wade                   Progressive neutral intranet   
223                   Love Inc                  Versatile intangible analyzer   

    category                     platforms  \
317     Tech  [tiktok, instagram, youtube]   
193   Gaming  [tiktok, youtube, instagram]   
354  Fitness             [tiktok, youtube]   
367     Tech  [instagram, youtube, tiktok]   
223   Gaming             [tiktok, youtube]   

                                             budget  \
317  {'min': 3116, 'max': 34589, 'currency': 'INR'}   
193  {'min': 3307, 'max': 34719, 'currency': 'INR'}   
354  {'min': 3717, 'max': 40267, 'currency': 'INR'}   
367  {'min': 187

In [9]:
import numpy as np

def get_preference_vector(interacted_indices, brand_embeddings):
    """
    Computes the average embedding of all brands previously engaged with.
    Returns None if there are no interactions.
    """
    if not interacted_indices:
        return None
    return np.mean(brand_embeddings[interacted_indices], axis=0)

def search_brands(
        query,
        brand_embeddings,
        brands_df,
        model,
        interacted_indices=None,
        top_k=5,
        alpha=0.6
    ):
    """
    Returns the top_k matching brands for an influencer query,
    optionally personalized using previous behavior.

    Parameters:
        query: string (the influencer's search intent or profile text)
        brand_embeddings: numpy array of brand embeddings
        brands_df: DataFrame containing your brands
        model: loaded sentence-transformers model
        interacted_indices: list of int (indices of brands the influencer interacted with)
        top_k: int (number of results)
        alpha: float (weight for personalization, between 0 and 1)

    Returns:
        DataFrame with the top_k matched brands.
    """
    query_embed = model.encode([query])[0]
    pref_vector = get_preference_vector(interacted_indices or [], brand_embeddings)
    if pref_vector is not None:
        final_embed = alpha * query_embed + (1 - alpha) * pref_vector
    else:
        final_embed = query_embed

    # Compute cosine similarity
    brand_norms = np.linalg.norm(brand_embeddings, axis=1)
    final_norm = np.linalg.norm(final_embed)
    similarities = np.dot(brand_embeddings, final_embed) / (brand_norms * final_norm + 1e-10)

    top_indices = np.argsort(similarities)[::-1][:top_k]
    return brands_df.iloc[top_indices].reset_index(drop=True)


In [10]:
# Replace with real search text and previously interacted brand indices (if any)
matched_brands = search_brands(
    query="Sustainable fashion campaign Instagram",
    brand_embeddings=brand_embeddings,
    brands_df=brands_df,
    model=model,
    interacted_indices=[1, 5, 9],  # previously engaged brand indices, or [] if none
    top_k=5,
    alpha=0.7
)
# matched_brands now has the top 5 recommended brands


In [11]:
import numpy as np

def get_preference_vector(interacted_indices, brand_embeddings):
    """
    Returns the average embedding of previously engaged brands,
    or None if there is no history.
    """
    if not interacted_indices:
        return None
    return np.mean(brand_embeddings[interacted_indices], axis=0)

def search_brands(
        query,
        brand_embeddings,
        brands_df,
        model,
        interacted_indices=None,
        top_k=5,
        alpha=0.6
    ):
    """
    Personalized search for brands based on query + behavior signals.

    Parameters:
        query (str): The influencer's query or profile text.
        brand_embeddings (np.ndarray): Embeddings for all brands.
        brands_df (pd.DataFrame): Brand details.
        model: Loaded sentence-transformers model.
        interacted_indices (list[int]): Indices of brands the influencer engaged with.
        top_k (int): Number of results to return.
        alpha (float): Weight for the query embedding vs. preference vector.

    Returns:
        pd.DataFrame: Top matching brands.
    """
    query_embed = model.encode([query])[0]
    pref_vector = get_preference_vector(interacted_indices or [], brand_embeddings)
    if pref_vector is not None:
        final_embed = alpha * query_embed + (1 - alpha) * pref_vector
    else:
        final_embed = query_embed

    # Compute cosine similarity
    brand_norms = np.linalg.norm(brand_embeddings, axis=1)
    final_norm = np.linalg.norm(final_embed)
    similarities = np.dot(brand_embeddings, final_embed) / (brand_norms * final_norm + 1e-10)
    top_indices = np.argsort(similarities)[::-1][:top_k]
    return brands_df.iloc[top_indices].reset_index(drop=True)


In [15]:
# Current influencer query
influencer_query = "Sustainable Indian skincare campaign for Instagram"

# Example indices of brands the influencer previously engaged with
interacted_indices = [2, 10, 15]  # Replace with actual history or keep as [] for no personalization

# Run personalized search
recommended_brands = search_brands(
    query=influencer_query,
    brand_embeddings=brand_embeddings,
    brands_df=brands_df,
    model=model,
    interacted_indices=interacted_indices,  # use [] for no personalization
    top_k=5,
    alpha=0.6
)

# Choose columns that exist in your dataset
display_cols = ['title', 'description', 'category', 'budget', 'targetAudience', 'requirements', 'metadata']

print("Personalized Brand Matches for Influencer:")
print(recommended_brands[display_cols])



Personalized Brand Matches for Influencer:
                          title                               description  \
0       Davies, Allen and Price        Quality-focused secondary function   
1                Martinez Group   Configurable systemic Internet solution   
2                     Walsh Ltd  Down-sized attitude-oriented methodology   
3       Flores, Thomas and Bush     Horizontal object-oriented encryption   
4  Contreras, Ellis and Gregory               Public-key coherent support   

  category                                          budget  \
0  Fashion  {'min': 4139, 'max': 10548, 'currency': 'INR'}   
1   Beauty   {'min': 4917, 'max': 5079, 'currency': 'INR'}   
2     Tech  {'min': 1195, 'max': 43062, 'currency': 'INR'}   
3  Fitness   {'min': 2000, 'max': 7351, 'currency': 'INR'}   
4  Fitness  {'min': 3013, 'max': 31692, 'currency': 'INR'}   

                                      targetAudience  \
0  {'ageRange': {'min': 23, 'max': 55}, 'gender':...   
1  {'ageR

In [17]:
# --- EVERYTHING IN ONE CELL ---

# 1. Install and import required libraries
!pip install -q sentence-transformers faiss-cpu pandas numpy

import pandas as pd
import numpy as np
import json
from sentence_transformers import SentenceTransformer
import faiss

# 2. Load brand JSON data (upload your file first!)
with open('/content/drive/MyDrive/Colab Notebooks/indian_synthetic_brands.json', 'r') as f:
    brands_data = json.load(f)

brands_df = pd.DataFrame(brands_data)
print("Brand data columns:", brands_df.columns.tolist())

# 3. Prepare embedding text for each brand (adjust keys to match your dataset)
def brand_text(row):
    fields = [
        row.get('title', ''),
        row.get('description', ''),
        row.get('category', ''),
        str(row.get('budget', '')),
        str(row.get('targetAudience', '')),
        str(row.get('requirements', '')),
        str(row.get('metadata', ''))
    ]
    return ' | '.join([str(f) for f in fields if f])

brands_df['embedding_text'] = brands_df.apply(brand_text, axis=1)

# 4. Compute brand embeddings
model = SentenceTransformer('all-MiniLM-L6-v2')
brand_embeddings = model.encode(brands_df['embedding_text'].tolist(), show_progress_bar=True)
brand_embeddings = np.array(brand_embeddings).astype('float32')

# 5. Simple contextual search function
def test_contextual_search(
    query,
    brand_embeddings,
    brands_df,
    model,
    top_k=5
):
    query_vec = model.encode([query])[0]
    brand_norms = np.linalg.norm(brand_embeddings, axis=1)
    query_norm = np.linalg.norm(query_vec)
    similarities = np.dot(brand_embeddings, query_vec) / (brand_norms * query_norm + 1e-10)
    top_indices = np.argsort(similarities)[::-1][:top_k]
    return brands_df.iloc[top_indices].reset_index(drop=True)

# 6. Run a test search
test_query = "Organic skincare campaign for Indian teenagers"
top_matches = test_contextual_search(
    query=test_query,
    brand_embeddings=brand_embeddings,
    brands_df=brands_df,
    model=model,
    top_k=5
)

# 7. Display top matches (adjust columns as in your dataset)
display_cols = ['title', 'description', 'category', 'budget', 'targetAudience', 'requirements', 'metadata']
print("\nTop Brand Matches for Query:")
print(top_matches[display_cols])


Brand data columns: ['brandId', 'title', 'description', 'category', 'platforms', 'budget', 'targetAudience', 'requirements', 'timeline', 'status', 'applications', 'selectedInfluencers', 'metadata', 'createdAt', 'updatedAt']


Batches:   0%|          | 0/16 [00:00<?, ?it/s]


Top Brand Matches for Query:
                        title                                description  \
0                Mccarthy Ltd  Multi-lateral attitude-oriented moderator   
1               Jacobs-Foster              Organic zero-defect hierarchy   
2               Carlson-Ayala          Self-enabling reciprocal protocol   
3    Mcneil, Solis and Horton      Face-to-face real-time implementation   
4  Taylor, Weaver and Mcbride           Devolved fault-tolerant solution   

        category                                          budget  \
0  Entertainment  {'min': 2139, 'max': 32153, 'currency': 'INR'}   
1      Education  {'min': 3649, 'max': 36593, 'currency': 'INR'}   
2        Fashion  {'min': 2592, 'max': 16163, 'currency': 'INR'}   
3  Entertainment   {'min': 964, 'max': 40779, 'currency': 'INR'}   
4        Fitness  {'min': 4527, 'max': 49047, 'currency': 'INR'}   

                                      targetAudience  \
0  {'ageRange': {'min': 24, 'max': 46}, 'gender'

In [18]:
# --- Full influencer-to-brand recommendation code ---

import numpy as np
import pandas as pd
from sentence_transformers import SentenceTransformer

# Assume these are pre-loaded in your environment:
# model: Loaded SentenceTransformer model
# brand_embeddings: numpy array of brand embeddings
# brands_df: DataFrame containing brand information

def influencer_text(influencer):
    """
    Constructs a text representation from an influencer profile dictionary.
    Adjust the keys to match your influencer data.
    """
    fields = [
        influencer.get('bio', ''),
        influencer.get('categories', ''),
        influencer.get('platform', ''),
        influencer.get('location', ''),
        influencer.get('language', ''),
        influencer.get('interests', ''),
        str(influencer.get('follower_count', '')),
        str(influencer.get('engagement_rate', ''))
    ]
    return ' | '.join([str(f) for f in fields if f])

def get_preference_vector(interacted_indices, brand_embeddings):
    """
    Compute average embedding of interacted brands for personalization.
    """
    if not interacted_indices:
        return None
    return np.mean(brand_embeddings[interacted_indices], axis=0)

def recommend_brands_for_influencer(
    influencer_profile,
    brand_embeddings,
    brands_df,
    model,
    interacted_indices=None,
    top_k=5,
    alpha=0.6
):
    """
    Recommend brands to an influencer given their profile and interaction history.

    Parameters:
    - influencer_profile: dict with influencer’s details
    - brand_embeddings: numpy array with brand embeddings
    - brands_df: DataFrame of brand data
    - model: sentence-transformer embedding model
    - interacted_indices: list of indices of brands influencer has engaged with (optional)
    - top_k: number of recommended brands to return
    - alpha: blending factor between query and preference vector

    Returns:
    - DataFrame of top recommended brands
    """
    # Convert influencer profile to text
    query_text = influencer_text(influencer_profile)
    # Embed influencer text
    query_embed = model.encode([query_text])[0]
    # Get preference vector from past interactions
    pref_vec = get_preference_vector(interacted_indices or [], brand_embeddings)
    # Blend query and preference vector
    if pref_vec is not None:
        final_vec = alpha * query_embed + (1 - alpha) * pref_vec
    else:
        final_vec = query_embed
    # Cosine similarity with brands
    brand_norms = np.linalg.norm(brand_embeddings, axis=1)
    final_norm = np.linalg.norm(final_vec)
    similarities = np.dot(brand_embeddings, final_vec) / (brand_norms * final_norm + 1e-10)
    # Retrieve top-k indices
    top_indices = np.argsort(similarities)[::-1][:top_k]
    # Return matching brands
    return brands_df.iloc[top_indices].reset_index(drop=True)

# ===== Example usage =====
# Define an example influencer profile (update keys to your data structure)
influencer_example = {
    "bio": "Comedy content creator passionate about tech gadgets.",
    "categories": "Comedy, Technology",
    "platform": "TikTok",
    "location": "India",
    "language": "Hindi",
    "interests": "Comedy, Gadgets, Tech",
    "follower_count": 15000,
    "engagement_rate": 6.5
}

# Example: No prior interactions (new influencer)
interacted = []

# Get recommendations
recommended_brands = recommend_brands_for_influencer(
    influencer_profile=influencer_example,
    brand_embeddings=brand_embeddings,
    brands_df=brands_df,
    model=model,
    interacted_indices=interacted,
    top_k=5,
    alpha=0.6
)

# Specify columns present in your dataset to display
display_cols = ['title', 'description', 'category', 'budget', 'targetAudience', 'requirements', 'metadata']

print("Recommended Brands for Influencer:")
print(recommended_brands[display_cols])


Recommended Brands for Influencer:
                            title                             description  \
0        Camacho, Wells and Keith  Phased exuding artificial intelligence   
1                   Adkins-Obrien           Operative tangible neural-net   
2  Jones, Valenzuela and Phillips    Profit-focused multimedia moratorium   
3                    Turner-Reese            Versatile national emulation   
4                    Tucker-Ramos   Future-proofed didactic system engine   

        category                                          budget  \
0         Gaming  {'min': 3307, 'max': 34719, 'currency': 'INR'}   
1           Tech  {'min': 3558, 'max': 14265, 'currency': 'INR'}   
2  Entertainment  {'min': 2539, 'max': 12280, 'currency': 'INR'}   
3      Education   {'min': 743, 'max': 34005, 'currency': 'INR'}   
4           Food  {'min': 4079, 'max': 31216, 'currency': 'INR'}   

                                      targetAudience  \
0  {'ageRange': {'min': 19, 'max': 44

In [19]:
# 1. Function to build influencer text from user input
def user_influencer_input():
    print('Enter influencer details. Leave blank for any field you wish to skip.\n')
    bio = input("Bio: ")
    categories = input("Categories (comma-separated): ")
    platform = input("Platform: ")
    location = input("Location: ")
    language = input("Language: ")
    interests = input("Interests (comma-separated): ")
    follower_count = input("Follower count (number): ")
    engagement_rate = input("Engagement rate (number): ")
    return {
        "bio": bio,
        "categories": categories,
        "platform": platform,
        "location": location,
        "language": language,
        "interests": interests,
        "follower_count": follower_count,
        "engagement_rate": engagement_rate
    }

def influencer_text(influencer):
    fields = [
        influencer.get('bio', ''),
        influencer.get('categories', ''),
        influencer.get('platform', ''),
        influencer.get('location', ''),
        influencer.get('language', ''),
        influencer.get('interests', ''),
        str(influencer.get('follower_count', '')),
        str(influencer.get('engagement_rate', ''))
    ]
    return ' | '.join([str(f) for f in fields if f])

def get_preference_vector(interacted_indices, brand_embeddings):
    if not interacted_indices:
        return None
    return np.mean(brand_embeddings[interacted_indices], axis=0)

def recommend_brands_for_influencer(
    influencer_profile,
    brand_embeddings,
    brands_df,
    model,
    interacted_indices=None,
    top_k=5,
    alpha=0.6
):
    query_text = influencer_text(influencer_profile)
    query_embed = model.encode([query_text])[0]
    pref_vec = get_preference_vector(interacted_indices or [], brand_embeddings)
    final_vec = alpha * query_embed + (1 - alpha) * pref_vec if pref_vec is not None else query_embed
    brand_norms = np.linalg.norm(brand_embeddings, axis=1)
    final_norm = np.linalg.norm(final_vec)
    similarities = np.dot(brand_embeddings, final_vec) / (brand_norms * final_norm + 1e-10)
    top_indices = np.argsort(similarities)[::-1][:top_k]
    return brands_df.iloc[top_indices].reset_index(drop=True)

# 2. Collect user input for an influencer
user_influencer = user_influencer_input()
#    (Optionally, provide indices of brands they interacted with for personalization)
interacted_indices = []  # e.g. [2, 10, 15]

# 3. Recommend and display brands
recommended_brands = recommend_brands_for_influencer(
    influencer_profile=user_influencer,
    brand_embeddings=brand_embeddings,
    brands_df=brands_df,
    model=model,
    interacted_indices=interacted_indices,
    top_k=5,
    alpha=0.6
)
display_cols = ['title', 'description', 'category', 'budget', 'targetAudience', 'requirements', 'metadata']
print("\nRecommended Brands:")
print(recommended_brands[display_cols])


Enter influencer details. Leave blank for any field you wish to skip.

Bio: beauty
Categories (comma-separated): makeup
Platform: instagram
Location: india
Language: hindhi
Interests (comma-separated): beauty, selfcare, makeup
Follower count (number): 2000
Engagement rate (number): 6000

Recommended Brands:
                            title                                description  \
0    Carter, Armstrong and Farley               Profound systematic attitude   
1                        Khan Ltd    Multi-lateral well-modulated initiative   
2     Johnston, Griffin and Myers           Visionary systematic parallelism   
3  Jones, Valenzuela and Phillips       Profit-focused multimedia moratorium   
4                    Mccarthy Ltd  Multi-lateral attitude-oriented moderator   

        category                                          budget  \
0           Food   {'min': 906, 'max': 15677, 'currency': 'INR'}   
1        Fitness  {'min': 4795, 'max': 14759, 'currency': 'INR'}   
2     

In [22]:
# --- Filter brands by category entered by user ---

def category_search(category, brands_df):
    """
    Returns all brands whose 'category' field includes the input string (case-insensitive).
    """
    category = category.strip().lower()
    mask = brands_df['category'].astype(str).str.lower().str.contains(category)
    return brands_df[mask].reset_index(drop=True)

# Prompt user for category input
user_category = input("Enter influencer's category of interest: ").strip()

# Perform the category search
matched_brands = category_search(user_category, brands_df)

# Select columns to display (adjust as needed)
display_cols = ['title', 'description', 'category', 'budget', 'targetAudience', 'requirements', ]

# Show results
if matched_brands.empty:
    print(f"No brands found for category: '{user_category}'")
else:
    print(f"\nBrands matching category '{user_category}':\n")
    print(matched_brands[display_cols])


Enter influencer's category of interest: beauty

Brands matching category 'beauty':

                     title                          description category  \
0           Sanchez-Taylor  Customer-focused systematic support   Beauty   
1                Cohen Inc      Focused 5thgeneration workforce   Beauty   
2        Burgess-Patterson          Reduced impactful hierarchy   Beauty   
3             Lee and Sons          Centralized optimal support   Beauty   
4              Gardner LLC              Reduced neutral circuit   Beauty   
..                     ...                                  ...      ...   
65              Walton LLC        Organic multi-state help-desk   Beauty   
66  Ochoa, Sutton and Hill  Triple-buffered real-time challenge   Beauty   
67           Jacobs-Wright          Managed non-volatile matrix   Beauty   
68         Lawson and Sons   Implemented bottom-line monitoring   Beauty   
69              Jensen Inc   Integrated disintermediate support   Beauty   

  

In [23]:
# Save the transformer model
model.save('brand_embedding_model')

# Save the brand embeddings
np.save('brand_embeddings.npy', brand_embeddings)

# Save the brand dataframe as CSV (pickle for advanced types)
brands_df.to_csv('brands_df.csv', index=False)


In [24]:
from sentence_transformers import SentenceTransformer
import numpy as np
import pandas as pd

# Load sentence-transformer model
model = SentenceTransformer('brand_embedding_model')

# Load embeddings
brand_embeddings = np.load('brand_embeddings.npy')

# Load DataFrame
brands_df = pd.read_csv('brands_df.csv')


In [25]:
def predict_brands_from_input(user_input, model, brand_embeddings, brands_df, top_k=5):
    query_vec = model.encode([user_input])[0]
    brand_norms = np.linalg.norm(brand_embeddings, axis=1)
    query_norm = np.linalg.norm(query_vec)
    similarities = np.dot(brand_embeddings, query_vec) / (brand_norms * query_norm + 1e-10)
    top_indices = np.argsort(similarities)[::-1][:top_k]
    return brands_df.iloc[top_indices].reset_index(drop=True)


In [27]:
# Take input from user (could use influencer bio, interests, etc.)
user_input = input("Enter influencer's details or campaign interest: ")

# Get top matching brands
top_brands = predict_brands_from_input(
    user_input,
    model,
    brand_embeddings,
    brands_df,
    top_k=5
)

# Display result columns as appropriate
display_cols = ['title', 'description', 'category', 'budget', 'targetAudience', 'requirements',]
print("\nPredicted Brands for User Input:")
print(top_brands[display_cols])


Enter influencer's details or campaign interest: gameing

Predicted Brands for User Input:
              title                                     description category  \
0     Parker-Cooper                        Multi-layered 24/7 model   Gaming   
1       Hickman Ltd               Team-oriented dedicated structure   Gaming   
2  Reynolds-Frazier  Switchable homogeneous artificial intelligence   Gaming   
3      Sparks-Eaton                   Adaptive motivating challenge   Travel   
4   Gregory-Compton           Team-oriented client-driven groupware   Gaming   

                                           budget  \
0  {'min': 3658, 'max': 23587, 'currency': 'INR'}   
1  {'min': 3834, 'max': 17479, 'currency': 'INR'}   
2   {'min': 4564, 'max': 6933, 'currency': 'INR'}   
3  {'min': 1612, 'max': 36556, 'currency': 'INR'}   
4  {'min': 1381, 'max': 34460, 'currency': 'INR'}   

                                      targetAudience  \
0  {'ageRange': {'min': 21, 'max': 63}, 'gender':... 