In [1]:
!pip install langchain google-generativeai matplotlib sentence-transformers scikit-learn plotly numpy
!pip install langchain-google-genai

Collecting langchain-google-genai
  Downloading langchain_google_genai-2.0.8-py3-none-any.whl.metadata (3.6 kB)
Collecting filetype<2.0.0,>=1.2.0 (from langchain-google-genai)
  Downloading filetype-1.2.0-py2.py3-none-any.whl.metadata (6.5 kB)
Downloading langchain_google_genai-2.0.8-py3-none-any.whl (41 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m41.5/41.5 kB[0m [31m2.5 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading filetype-1.2.0-py2.py3-none-any.whl (19 kB)
Installing collected packages: filetype, langchain-google-genai
Successfully installed filetype-1.2.0 langchain-google-genai-2.0.8


In [2]:
import os
import google.generativeai as genai
from langchain_google_genai import ChatGoogleGenerativeAI
from langchain.prompts import PromptTemplate
from langchain.chains import LLMChain
import matplotlib.pyplot as plt
import numpy as np
from sklearn.decomposition import PCA
from sentence_transformers import SentenceTransformer
import plotly.graph_objects as go

In [3]:
from google.colab import userdata
GOOGLE_API_KEY = userdata.get('google_api_key')

genai.configure(api_key=GOOGLE_API_KEY)

In [4]:
def generate_keywords(idea_description, max_tokens=100):
    """
    Generates context-aware keywords from an idea description using Gemini 1.5 Flash.
    Args:
        idea_description (str): The description of the idea.
        max_tokens (int, optional): Maximum number of tokens for the response. Defaults to 100.

    Returns:
        list: A list of extracted keywords.
    """
    llm = ChatGoogleGenerativeAI(model="gemini-1.5-flash", google_api_key=GOOGLE_API_KEY, max_output_tokens=max_tokens)

    prompt_template = """
    You are a keyword extractor. Your goal is to extract keywords from the given text and output them as a comma-separated list.
    Consider the overall context of the text to make sure the keywords are relevant to the main ideas.

    Text:
    {idea_description}

    Keywords:
    """
    prompt = PromptTemplate.from_template(prompt_template)

    chain = LLMChain(llm=llm, prompt=prompt)
    response = chain.run(idea_description = idea_description)

    #split the comma separated string into a list
    keywords = [keyword.strip() for keyword in response.split(',')]

    return keywords

In [5]:
def get_keyword_embeddings(keywords):
    """
    Generates sentence embeddings for keywords using SentenceTransformer.

    Args:
        keywords (list): List of keywords.

    Returns:
        numpy.ndarray: Array of keyword embeddings.
    """
    model = SentenceTransformer('all-MiniLM-L6-v2')
    embeddings = model.encode(keywords)
    return embeddings

In [9]:
def reduce_to_2d(embeddings):
    """
    Reduces the dimensionality of embeddings to 2D using PCA.

    Args:
        embeddings (numpy.ndarray): Array of keyword embeddings.

    Returns:
        numpy.ndarray: Array of 2D embeddings.
    """
    pca = PCA(n_components=2)
    reduced_embeddings = pca.fit_transform(embeddings)
    return reduced_embeddings

In [7]:
def plot_2d_keywords(keywords, embeddings, output_path="keywords_2d_plot.html"):
    """
    Plots keywords as points in a 2D space using plotly.
    Args:
        keywords (list): List of keywords.
        embeddings (numpy.ndarray): Array of 3D keyword embeddings.
        output_path (str, optional): Path to save the plot HTML file. Defaults to "keywords_2d_plot.html".
    """
    if not keywords or embeddings.size == 0:
        print("No keywords or embeddings to plot.")
        return

    # Use only the first two dimensions for 2D plotting
    x, y = embeddings[:, 0], embeddings[:, 1]

    fig = go.Figure(data=[go.Scatter(
        x=x,
        y=y,
        mode='markers+text',
        text=keywords,
        textposition="top center",
        marker=dict(
            size=15,
            color=np.arange(len(keywords)),
            colorscale='Viridis',
            opacity=0.8
        )
    )])

    fig.update_layout(
        showlegend=False,
        xaxis=dict(visible=False, showticklabels=False),
        yaxis=dict(visible=False, showticklabels=False),
        plot_bgcolor='rgba(0,0,0,0)',
        paper_bgcolor='rgba(0,0,0,0)',
        margin=dict(l=0, r=0, b=0, t=0),
        height=600,
        width=600
    )

    fig.write_html(output_path)
    fig.show()

In [8]:
def main():
    idea_description = input("Enter your idea description: ")

    keywords = generate_keywords(idea_description)
    print("\nExtracted Keywords:", keywords)

    if not keywords:
        return

    keyword_embeddings = get_keyword_embeddings(keywords)
    reduced_embeddings = reduce_to_2d(keyword_embeddings)

    plot_2d_keywords(keywords, reduced_embeddings)


if __name__ == "__main__":
    main()

Enter your idea description: Imagine a community-powered disaster resilience network called "NeighborShield" that revolutionizes how neighborhoods prepare for and respond to natural disasters. The system would combine smart home technology, artificial intelligence, and social connectivity to create a decentralized support structure that activates before, during, and after emergencies. Each household would install low-cost sensors that monitor environmental conditions and structural integrity, while a mobile app would facilitate resource sharing, skill mapping, and real-time communication among neighbors. During normal times, the platform would gamify emergency preparedness by organizing community drills, maintaining shared resource inventories, and facilitating skill-sharing workshops where neighbors can learn essential skills like first aid or basic repairs. When disasters strike, the AI-powered system would automatically assess damage patterns, identify vulnerable residents, and coor

  chain = LLMChain(llm=llm, prompt=prompt)
  response = chain.run(idea_description = idea_description)



Extracted Keywords: ['NeighborShield', 'disaster resilience', 'community-powered network', 'smart home technology', 'artificial intelligence', 'social connectivity', 'emergency preparedness', 'resource sharing', 'skill mapping', 'real-time communication', 'disaster response', 'damage assessment', 'volunteer coordination', 'community rebuilding', 'climate uncertainty', 'social capital']


The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


modules.json:   0%|          | 0.00/349 [00:00<?, ?B/s]

config_sentence_transformers.json:   0%|          | 0.00/116 [00:00<?, ?B/s]

README.md:   0%|          | 0.00/10.7k [00:00<?, ?B/s]

sentence_bert_config.json:   0%|          | 0.00/53.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/612 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/90.9M [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/350 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/466k [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/112 [00:00<?, ?B/s]

1_Pooling/config.json:   0%|          | 0.00/190 [00:00<?, ?B/s]