By [Yulandy Chiu](https://www.youtube.com/@YulandySpace)

Aided with Gemini/Claude/ChatGPT and modified by Yulandy Chiu

Version: 2024/12/24

Video: [RAG implementation using Google Search and Gemini](https://youtu.be/cmHsOdlieXY)


 This code is licensed under the Creative Commons Attribution-NonCommercial 4.0
 International License (CC BY-NC 4.0). You are free to use, modify, and share this code for non-commercial purposes, provided you give appropriate credit. For more details, see the LICENSE file or visit: https://creativecommons.org/licenses/by-nc/4.0/
 © [2024] [Yulandy Chiu](https://www.youtube.com/@YulandySpace)



In [None]:
# Test the response of Google Gemini
import google.generativeai as genai
from google.colab import userdata

api_key=userdata.get('Gemini_API_Key')
genai.configure(api_key=api_key)
model = genai.GenerativeModel("gemini-1.5-flash")
response = model.generate_content("Explain how AI works")

# response = model.generate_content("Provide Taiwanese baseball news from 2024")
print(response.text)

In [None]:
# Step 1: Extract query from the prompt (the query will be used as keywords in Google search)

user_query="Based on the Taiwanese baseball news in 2024, generate a long-form story."


import google.generativeai as genai

def generate_gemini_response(llm_input, api_key, model_name):
    """
Creates a text response using the Google Gemini API.
Args:
llm_input: The user's input, including the query and any retrieved data.
api_key: The API key for the Google Gemini API.
model_name: The name of the Gemini model to use. Defaults to "gemini-1.5-flash".
Returns:
The generated response from the Gemini model.
    """
    genai.configure(api_key=api_key)
    model = genai.GenerativeModel(model_name=model_name)
    try:
        response = model.generate_content(llm_input)
        return response.text
    except Exception as e:
        print(f"Error during Gemini response generation: {e}")
        return None

# parameters
from google.colab import userdata
api_key=userdata.get('Gemini_API_Key')

llm_input = f"Analyze the required information that needs to be searched online due to insufficient internal knowledge.\
Generate keywords for the required online search information \
(no explanation needed, the returned data can be directly copied and pasted): {user_query} "
model_name="gemini-1.5-flash"
Google_Search_Keywords = generate_gemini_response(llm_input, api_key, model_name)
print(Google_Search_Keywords)

In [None]:
# Step 2: Realize ``Retrievel'' using Google Search
!pip install google-search-results

from googleapiclient.discovery import build

def search_google_custom(query, api_key, cse_id, num_results):
    """
    Performs a search using the Google Custom Search API.
    Args:
        query: The user's search query.
        api_key: The Google API key.
        cse_id: The ID of the Google Custom Search Engine.
        num_results: The number of search results to return.
    Returns:
        A list of search results, where each result contains the title, link, and snippet
    """
    service = build("customsearch", "v1", developerKey=api_key)
    try:
      response = (
          service.cse()
          .list(q=query, cx=cse_id, num=num_results)
          .execute()
      )
      results = response.get("items", [])

      formatted_results = []
      for item in results:
          formatted_results.append({
              "title": item.get("title", ""),
              "link": item.get("link", ""),
              "snippet": item.get("snippet", "")
          })
      return formatted_results
    except Exception as e:
        print(f"Error during Google Custom Search: {e}")
        return []


# parameters
from google.colab import userdata

api_key=userdata.get('Google_Search_API_Key')
cse_id =userdata.get('Google_CSE_ID')
query=Google_Search_Keywords
search_results = search_google_custom(query, api_key, cse_id, num_results=5)

if search_results:
    for result in search_results:
        print(f"Title: {result['title']}")
        print(f"Link: {result['link']}")
        print(f"Snippet: {result['snippet']}")
        print("-" * 30)
else:
    print("No search results found.")




 # Create LLM input
Retrieval_content = "Below is the information retrieved from the Internet:\n"
for i, result in enumerate(search_results):
  Retrieval_content += f"Title：{result['title']}\n"
  Retrieval_content += f"Snippet：{result['snippet']}\n"
print(Retrieval_content)


In [None]:
# Step 3 Realize ``Augmented Generation''

# augmented
llm_input = f"Prompt:\n"+ f"{user_query}\n" + f"{Retrieval_content[:4000]}" # Control the input length within 4000 characters.
print(llm_input)

# generaiton
gemini_output = generate_gemini_response(llm_input, api_key, model_name)

print(f"Gemini output:\n" +gemini_output)

In [None]:
# Complete Rag
%reset -sf

user_query="Generate a long story based on the news of Donald Trump\
 being elected as the President of the United States in 2024"


import google.generativeai as genai

def generate_gemini_response(llm_input, api_key, model_name):
    """
    Generate a response using the Google Gemini API.

    Args:
       llm_input: User input (including questions and retrieved data).
       api_key: API key for the Google Gemini API.
       model_name: Name of the Gemini model, default is "gemini-1.5-flash".
    Returns:
       The response generated by the Gemini model.
    """
    genai.configure(api_key=api_key)
    model = genai.GenerativeModel(model_name=model_name)
    try:
        response = model.generate_content(llm_input)
        return response.text
    except Exception as e:
        print(f"Error during Gemini response generation: {e}")
        return None

# parameters
from google.colab import userdata
api_key=userdata.get('Gemini_API_Key')


llm_input = f"Analyze the required information that needs to be searched online due to insufficient internal knowledge.\
Generate keywords for the required online search information \
(no explanation needed, the returned data can be directly copied and pasted): {user_query} "
model_name="gemini-1.5-flash"
Google_Search_Keywords = generate_gemini_response(llm_input, api_key, model_name)
# print(Google_Search_Keywords)

# Step 2: Realize ``Retrievel'' using Google Search
# !pip install google-search-results

from googleapiclient.discovery import build

def search_google_custom(query, api_key, cse_id, num_results):
    """
    Performs a search using the Google Custom Search API.
    Args:
        query: The user's search query.
        api_key: The Google API key.
        cse_id: The ID of the Google Custom Search Engine.
        num_results: The number of search results to return.
    Returns:
        A list of search results, where each result contains the title, link, and snippet
    """
    service = build("customsearch", "v1", developerKey=api_key)
    try:
      response = (
          service.cse()
          .list(q=query, cx=cse_id, num=num_results)
          .execute()
      )
      results = response.get("items", [])

      formatted_results = []
      for item in results:
          formatted_results.append({
              "title": item.get("title", ""),
              "link": item.get("link", ""),
              "snippet": item.get("snippet", "")
          })
      return formatted_results
    except Exception as e:
        print(f"Error during Google Custom Search: {e}")
        return []


# parameters
from google.colab import userdata

api_key=userdata.get('Google_Search_API_Key')
cse_id =userdata.get('Google_CSE_ID')
query=Google_Search_Keywords
search_results = search_google_custom(query, api_key, cse_id, num_results=5)

 # Create LLM input
Retrieval_content = "Below is the information retrieved from the Internet:\n"
for i, result in enumerate(search_results):
  Retrieval_content += f"Title：{result['title']}\n"
  Retrieval_content += f"Snippet：{result['snippet']}\n"


# Step 3 Realize ``Augmented Generation''

# augmented
llm_input = f"Prompt:\n"+ f"{user_query}\n" + f"{Retrieval_content[:4000]}" # Control the input length within 4000 characters.
# print(llm_input)

# generaiton
gemini_output = generate_gemini_response(llm_input, api_key, model_name)

print(f"Gemini output:\n" +gemini_output)