In [23]:
import sys
import os
import requests
from bs4 import BeautifulSoup
from arxiv import Search
import re

def prompt_user():
    return input("Enter a search query: ")

def display_progress(progress):
    print(f"Downloading papers... {progress}%")

def display_results(results):
    for result in results:
        print(result)

def display_error(error):
    print(f"Error: {error}")

def display_completion():
    print("Download completed.")

def search_google_scholar(query):
    url = f"https://scholar.google.com/scholar?q={query}&hl=en"
    response = requests.get(url)
    soup = BeautifulSoup(response.content, 'html.parser')
    results = []
    for result in soup.find_all('div', class_='gs_r gs_or gs_scl'):
        try:
            title = result.find('h3', class_='gs_rt').text
            url = result.find('a')['href']
            results.append({'title': title, 'url': url})
        except:
            continue
    return results

def search_arxiv(query):
    results = Search(query=query, max_results=10).results()
    formatted_results = []
    for result in results:
        title = result.title
        url = result.pdf_url
        formatted_results.append({'title': title, 'url': url})
    return formatted_results

def combine_metadata(google_scholar_metadata, arxiv_metadata):
    return google_scholar_metadata + arxiv_metadata

def sanitize_filename(filename):
    # Remove special characters and replace spaces with underscores
    filename = re.sub(r"[^\w\s-]", "", filename)
    filename = re.sub(r"\s+", "_", filename)
    return filename

def download_paper(url, filename):
    response = requests.get(url)
    if response.status_code == 200:
        sanitized_filename = sanitize_filename(filename)
        with open(f"./papers/{sanitized_filename}.pdf", 'wb') as file:
            file.write(response.content)

def download_papers(metadata):
    total_papers = len(metadata)
    for i, paper in enumerate(metadata, 1):
        title = paper['title']
        url = paper['url']
        filename = f"{title}.pdf"
        download_paper(url, filename)
        progress = (i / total_papers) * 100
        display_progress(progress)
    display_completion()

def main():
    search_query = prompt_user()
    google_scholar_metadata = search_google_scholar(search_query)
    arxiv_metadata = search_arxiv(search_query)
    combined_metadata = combine_metadata(google_scholar_metadata, arxiv_metadata)
    download_papers(combined_metadata)

if __name__ == "__main__":
    main()

Downloading papers... 5.0%
Downloading papers... 10.0%
Downloading papers... 15.0%
Downloading papers... 20.0%
Downloading papers... 25.0%
Downloading papers... 30.0%
Downloading papers... 35.0%
Downloading papers... 40.0%
Downloading papers... 45.0%
Downloading papers... 50.0%
Downloading papers... 55.00000000000001%
Downloading papers... 60.0%
Downloading papers... 65.0%
Downloading papers... 70.0%
Downloading papers... 75.0%
Downloading papers... 80.0%
Downloading papers... 85.0%
Downloading papers... 90.0%
Downloading papers... 95.0%
Downloading papers... 100.0%
Download completed.


In [20]:
search_query = prompt_user()
google_scholar_metadata = search_google_scholar(search_query)
arxiv_metadata = search_arxiv(search_query)
combined_metadata = combine_metadata(google_scholar_metadata, arxiv_metadata)
# download_papers(combined_metadata)

In [21]:
google_scholar_metadata

[{'title': 'Raman spectroscopy for cancer detection: a review',
  'url': 'https://ieeexplore.ieee.org/abstract/document/756895/'},
 {'title': 'Raman spectroscopy for cancer detection and cancer surgery guidance: translation to the clinics',
  'url': 'https://pubs.rsc.org/en/content/articlehtml/2017/an/c7an00957g'},
 {'title': '[HTML][HTML] Raman spectroscopy for medical diagnostics—From in-vitro biofluid assays to in-vivo cancer detection',
  'url': 'https://www.sciencedirect.com/science/article/pii/S0169409X15000447'},
 {'title': 'Intraoperative brain cancer detection with Raman spectroscopy in humans',
  'url': 'https://www.researchgate.net/profile/Frederic-Leblond/publication/273777320_Intraoperative_brain_cancer_detection_with_Raman_spectroscopy_in_humans/links/55109b150cf20352196c2477/Intraoperative-brain-cancer-detection-with-Raman-spectroscopy-in-humans.pdf'},
 {'title': 'Resonance Raman and Raman spectroscopy for breast cancer detection',
  'url': 'https://journals.sagepub.com/

In [22]:
arxiv_metadata

[{'title': 'Machine Learning Characterization of Cancer Patients-Derived Extracellular Vesicles using Vibrational Spectroscopies',
  'url': 'http://arxiv.org/pdf/2107.10332v9'},
 {'title': 'Applications of Raman Spectroscopy in Clinical Medicine',
  'url': 'http://arxiv.org/pdf/2304.07740v1'},
 {'title': 'Feature Fusion of Raman Chemical Imaging and Digital Histopathology using Machine Learning for Prostate Cancer Detection',
  'url': 'http://arxiv.org/pdf/2101.07342v1'},
 {'title': 'Nonscanning large-area Raman imaging for ex vivo/in vivo skin cancer discrimination',
  'url': 'http://arxiv.org/pdf/1810.03363v1'},
 {'title': 'Integrated Photodynamic Raman Theranostics for Cancer Diagnosis, Treatment, and Post-Treatment Molecular Monitoring',
  'url': 'http://arxiv.org/pdf/2009.04222v1'},
 {'title': 'Malignancy Induced Subtle Perturbation Sensitive Raman Scattering for Glioma Detection and Grading',
  'url': 'http://arxiv.org/pdf/2005.02638v1'},
 {'title': 'Fast stimulated Raman imaging