## Installation

In [1]:
! pip install serpapi google-search-results

Collecting serpapi
  Downloading serpapi-0.1.5-py2.py3-none-any.whl.metadata (10 kB)
Collecting google-search-results
  Downloading google_search_results-2.4.2.tar.gz (18 kB)
  Preparing metadata (setup.py) ... [?25l[?25hdone
Downloading serpapi-0.1.5-py2.py3-none-any.whl (10 kB)
Building wheels for collected packages: google-search-results
  Building wheel for google-search-results (setup.py) ... [?25l[?25hdone
  Created wheel for google-search-results: filename=google_search_results-2.4.2-py3-none-any.whl size=32009 sha256=d8f18d13d823fa4200e0aeadaac7ec43d1e4b138e021d0e647770812ec73e947
  Stored in directory: /root/.cache/pip/wheels/6e/42/3e/aeb691b02cb7175ec70e2da04b5658d4739d2b41e5f73cd06f
Successfully built google-search-results
Installing collected packages: serpapi, google-search-results
Successfully installed google-search-results-2.4.2 serpapi-0.1.5


## Helper Function: Extract Bibtex

In [2]:
import requests
from typing import Dict, Optional

def get_bibtex_link(result_id: str, serpapi_key: str) -> Optional[str]:
    """
    Given a Google Scholar 'result_id', perform a google_scholar_cite request using
    SerpAPI and return the BibTeX link if it exists. Otherwise, return None.

    :param result_id: The article's unique result_id from a prior 'google_scholar' call.
    :param serpapi_key: Your SerpAPI API key.
    :return: A URL string pointing to the .bib resource, or None if not found.
    """
    # 1) Make the request to SerpAPI google_scholar_cite
    cite_url = "https://serpapi.com/search.json"
    params = {
        "engine": "google_scholar_cite",
        "q": result_id,
        "api_key": serpapi_key
    }

    response = requests.get(cite_url, params=params)
    if response.status_code != 200:
        # If you want, raise an error or just return None
        return None

    # 2) Parse JSON and find the 'links' array
    data = response.json()
    links = data.get("links", [])
    if not links:
        return None

    # 3) Look for the link where 'name' is 'BibTeX'
    for link_item in links:
        if link_item.get("name") == "BibTeX":
            return link_item.get("link")

    # If we never found a link with name="BibTeX", return None
    return None

In [3]:
import requests
from typing import Dict, Optional

def get_bibtex_link_and_content(result_id: str, serpapi_key: str) -> Optional[Dict[str, str]]:
    """
    Given a Google Scholar 'result_id', perform a google_scholar_cite request via SerpAPI
    and return both the BibTeX link and the content of that link (the .bib text).

    The returned dictionary has two keys:
      - "url": the BibTeX URL.
      - "content": the actual contents of the .bib file (multi-line BibTeX).

    If unsuccessful (e.g., no link found or request fails), returns None.

    :param result_id: The article's unique result_id from a prior 'google_scholar' call.
    :param serpapi_key: Your SerpAPI API key.
    :return: A dictionary { "url": str, "content": str }, or None if not found/failed.
    """
    # 1) Make the request to SerpAPI google_scholar_cite
    cite_url = "https://serpapi.com/search.json"
    params = {
        "engine": "google_scholar_cite",
        "q": result_id,
        "api_key": serpapi_key
    }

    response = requests.get(cite_url, params=params)
    if response.status_code != 200:
        return None  # You could raise an exception instead

    # 2) Parse the JSON and find the 'links' array
    data = response.json()
    links = data.get("links", [])
    if not links:
        return None

    # 3) Look for the link where 'name' is 'BibTeX'
    bibtex_url = None
    for link_item in links:
        if link_item.get("name") == "BibTeX":
            bibtex_url = link_item.get("link")
            break

    if not bibtex_url:
        return None

    # 4) Retrieve the .bib content from the BibTeX URL
    bibtex_resp = requests.get(bibtex_url)
    if bibtex_resp.status_code != 200:
        return None

    bibtex_text = bibtex_resp.text

    # Return a dictionary with both URL and content
    return {
        "url": bibtex_url,
        "content": bibtex_text
    }

## Main Function: `get_scholar_bibtex_list`

In [4]:
from serpapi import GoogleSearch
from typing import List, Dict

def get_scholar_bibtex_list(topic: str, api_key: str, max_results: int = 5) -> List[Dict[str, str]]:
    """
    Given a topic, this function queries Google Scholar via SerpAPI to retrieve
    a list of articles. For each article, it fetches:

      1) A snippet from the main search results (often a partial abstract).
      2) A properly formatted BibTeX string (i.e. @article{...}).

    The return value is a list of dictionaries, each with:
      - "abstract": str
      - "bibtex": str (multi-line @article block)

    :param topic: The search query/topic for which to retrieve scholarly articles.
    :param api_key: Your SerpAPI key used for authentication.
    :param max_results: The maximum number of articles to retrieve (default = 5).
    :return: A list of dictionaries, each containing "abstract" and "bibtex".
    """

    # 1. Query Google Scholar for articles
    search_params: Dict[str, str] = {
        "engine": "google_scholar",
        "q": topic,
        "hl": "en",         # Language (English) - adjust as necessary
        "api_key": api_key
    }
    search = GoogleSearch(search_params)
    results = search.get_dict()
    articles = results.get("organic_results", [])

    if not articles:
        return []

    output_list: List[Dict[str, str]] = []

    for article in articles[:max_results]:
        # "snippet" is often the short summary / partial abstract from the search results
        snippet: str = article.get("snippet", "")

        # "result_id" (sometimes called "cluster_id") is needed to fetch citations
        result_id: str = article.get("result_id", "")
        if not result_id:
            # Without a result_id, we can’t fetch the citation
            continue

        # 2. Request the BibTeX for this particular result_id
        cite_params: Dict[str, str] = {
            "engine": "google_scholar_cite",
            "q": result_id,
            "api_key": api_key,
            "citation_format": "bibtex"  # Force BibTeX response
        }
        cite_search = GoogleSearch(cite_params)
        cite_results = cite_search.get_dict()
        bibtex_url = get_bibtex_link(result_id, api_key)
        r_id = result_id
        result = get_bibtex_link_and_content(r_id, SERPAPI_API_KEY)
        # if result:
        #     print("BibTeX URL:", result["url"])
        #     print("BibTeX Content:\n", result["content"])


        citations = cite_results.get("citations", [])
        if not citations:
            # No citation data was returned
            continue

        # SerpAPI, when asked for "bibtex", typically returns a single item in "citations"
        # with a "snippet" containing multi-line BibTeX.
        first_citation = citations[0]
        bibtex_data: str = first_citation.get("snippet", "")

        # If SerpAPI returns escaped newlines, convert them.
        # E.g., "author={John Doe},\nyear={2020},..."
        # might show up as "author={John Doe},\\nyear={2020},..." in Python.
        # So let's replace literal `\n` with actual newlines:
        bibtex_data = bibtex_data.replace("\\n", "\n").replace("\\t", "\t")

        # 3. Append abstract + BibTeX to our output
        if bibtex_data.strip():
            output_list.append({
                "abstract": snippet,
                "result_id": result_id,
                "cite_results": cite_results,
                "biburl": bibtex_url,
                "biburl_content": result["content"] if result else "",
                "bibtex": bibtex_data,
            })

    return output_list

## Run

In [5]:
from google.colab import userdata
SERPAPI_API_KEY = userdata.get('SERPAPI_API_KEY')

In [9]:
my_api_key = SERPAPI_API_KEY
my_topic = "survey convolutional neural network computer vision"
results = get_scholar_bibtex_list(my_topic, my_api_key, max_results=30)
for idx, item in enumerate(results, start=1):
    print(f"Citation {idx}")
    print("Abstract:", item["abstract"])
    print("Result ID:", item["result_id"])
    print("BibTeX:\n", item["bibtex"])
    print("BibTeX URL:", item["biburl"])
    print("BibTeX URL Content:\n", item["biburl_content"])
    print("---")


Citation 1
Abstract: … The convolutional neural network (CNN) is used to construct the majority of computer vision algorithms. A … The contributions of this survey paper are summarized below: …
Result ID: GYz8XewKoRAJ
BibTeX:
 Zhao, Xia, et al. "A review of convolutional neural networks in computer vision." Artificial Intelligence Review 57.4 (2024): 99.
BibTeX URL: https://scholar.googleusercontent.com/scholar.bib?q=info:GYz8XewKoRAJ:scholar.google.com/&output=citation&scisdr=ClH5WuzgGAA:AFWwaeYAAAAAZ8HcahdRAEE2YVHfBR5ZnZDs8kI&scisig=AFWwaeYAAAAAZ8Hcap8XXWRaf8qoNTMy4CzA7cs&scisf=4&ct=citation&cd=-1&hl=en
BibTeX URL Content:
 
---
Citation 2
Abstract: … Deep learning [Citation70] gives an architecture for models with several processing layers … survey works that utilize ConvNet learning techniques to address major computer vision tasks …
Result ID: 34_cSJV2TqUJ
BibTeX:
 Tombe, Ronald, and Serestina Viriri. "Effective processing of convolutional neural networks for computer vision: a tu

## Copy Ready

In [10]:
apa_file = ""
bib_file = ""
texts = ""

i=0
for idx, item in enumerate(results, start=1):
    # print(f"Citation {idx}")
    # print("Abstract:", item["abstract"])
    # print("Result ID:", item["result_id"])
    # print("BibTeX:\n", item["bibtex"])
    # print("BibTeX URL:", item["biburl"])
    # print("BibTeX URL Content:\n", item["biburl_content"])
    # print("---")
    if len(item["biburl_content"]) > 1:
        apa_file += item["cite_results"]["citations"][1]["snippet"] + "\n\n"
        bib_file += item["biburl_content"] + "\n\n"
        bibtex_name = item["biburl_content"].split("{")[1].split(',')[0]
        texts += f'{item["abstract"]} \\cite{{{bibtex_name}}}\n'

print(apa_file)
print(bib_file)
print(texts)




