In [1]:
from bs4 import BeautifulSoup
import requests
import pandas as pd
from IPython.display import display

In [2]:
base_url = 'https://www.sciencedirect.com'
search_url = 'https://www.sciencedirect.com/search?qs=migraine%20AND%20acupuncture&show=100&' + \
      'sortBy=relevance&articleTypes=REV%2CFLA%2CABS%2CCRP%2CCNF&offset='

def scrape(url):
    
    response = requests.get(url)
    html = response.text
    soup = BeautifulSoup(html, 'html.parser')
    all_data = []
    
    for index, result_item in enumerate(soup.select('.ResultItem.col-xs-24.push-m')):
        data = {}

        title_item = result_item.select_one('a.result-list-title-link')
        if title_item:
            data['title'] = title_item.text
            data['title_url'] = base_url + title_item.attrs['href']

        research_type_item = result_item.select_one('.article-type')
        if research_type_item:
            data['research_type'] = research_type_item.text

        journal_item = result_item.select_one('.SubType')
        if journal_item:
            data['journal'] = journal_item.text
            if data['journal'].endswith(', '): data['journal'] = data['journal'][:-2]

        author_item = result_item.select_one('.Authors')
        if author_item:
            data['author'] = author_item.text
            if data['author'].endswith(', '): data['author'] = data['author'][:-2]

        open_access_item = result_item.select_one('.open-access')
        data['open_access'] = open_access_item != None

        publication_link_item = result_item.select_one('.subtype-srctitle-link')
        if publication_link_item:
            data['publication_link'] = base_url + publication_link_item.attrs['href']

        all_data.append(data)
        
    return all_data


In [3]:
# Iterate sites and scrape
data = []
for n in range(15):
    url = search_url + str(n * 100)
    print('Processing ', url)
    data += scrape(url)
df = pd.DataFrame(data)
display(df.head())
df.shape

Processing  https://www.sciencedirect.com/search?qs=migraine%20AND%20acupuncture&show=100&sortBy=relevance&articleTypes=REV%2CFLA%2CABS%2CCRP%2CCNF&offset=0
Processing  https://www.sciencedirect.com/search?qs=migraine%20AND%20acupuncture&show=100&sortBy=relevance&articleTypes=REV%2CFLA%2CABS%2CCRP%2CCNF&offset=100
Processing  https://www.sciencedirect.com/search?qs=migraine%20AND%20acupuncture&show=100&sortBy=relevance&articleTypes=REV%2CFLA%2CABS%2CCRP%2CCNF&offset=200
Processing  https://www.sciencedirect.com/search?qs=migraine%20AND%20acupuncture&show=100&sortBy=relevance&articleTypes=REV%2CFLA%2CABS%2CCRP%2CCNF&offset=300
Processing  https://www.sciencedirect.com/search?qs=migraine%20AND%20acupuncture&show=100&sortBy=relevance&articleTypes=REV%2CFLA%2CABS%2CCRP%2CCNF&offset=400
Processing  https://www.sciencedirect.com/search?qs=migraine%20AND%20acupuncture&show=100&sortBy=relevance&articleTypes=REV%2CFLA%2CABS%2CCRP%2CCNF&offset=500
Processing  https://www.sciencedirect.com/search

Unnamed: 0,author,journal,open_access,publication_link,research_type,title,title_url
0,"Jia Xu, Fu-qing Zhang, Jian Pei, Jun Ji","Journal of Integrative Medicine, In press, acc...",False,https://www.sciencedirect.com/science/journal/...,Review article,Acupuncture for migraine without aura: a syste...,https://www.sciencedirect.com/science/article/...
1,"Xianmin Yu, Alan Salmoni","Journal of Acupuncture and Meridian Studies, I...",True,https://www.sciencedirect.com/science/journal/...,Research article,Comparison of the Prophylactic Effect Between ...,https://www.sciencedirect.com/science/article/...
2,"Zhengjie Li, Fang Zeng, Tao Yin, Lei Lan, Jian...","NeuroImage: Clinical, Volume 15, 2017, Pages 3...",True,https://www.sciencedirect.com/science/journal/...,Research article,Acupuncture modulates the abnormal brainstem a...,https://www.sciencedirect.com/science/article/...
3,Yılmaz Sezgin,"Journal of Acupuncture and Meridian Studies, V...",True,https://www.sciencedirect.com/science/journal/...,Case report,The Acupuncture Therapeutic Approach in Tempor...,https://www.sciencedirect.com/science/article/...
4,"Jia-yuan NIU, Gang-qi FAN","World Journal of Acupuncture - Moxibustion, Vo...",False,https://www.sciencedirect.com/science/journal/...,Research article,Specific effect of acupuncture on the neural p...,https://www.sciencedirect.com/science/article/...


(1463, 7)

In [4]:
# Reorder columns
columns = ['author', 'title', 'title_url', 'journal', 'publication_link', 'open_access']
df = df[columns]

In [5]:
# Export to excel
df.to_excel('sciencedirect_result.xlsx', index=False)