# Web Scraping using Python

## Target Journal: JMRI
- https://onlinelibrary.wiley.com/toc/15222586/2022/55/5
- https://onlinelibrary.wiley.com/toc/15222586/2022/55/4
- https://onlinelibrary.wiley.com/toc/15222586/2022/55/3
- https://onlinelibrary.wiley.com/toc/15222586/2022/55/2
- https://onlinelibrary.wiley.com/toc/15222586/2022/55/1

In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

#To easily display the plots, make sure to include the line %matplotlib inline as shown below.
%matplotlib inline

import urllib
import requests
import lxml

#To perform web scraping, you should also import the libraries shown below. 
#The urllib.request module is used to open URLs. 
#The Beautiful Soup package is used to extract data from html files. 
#The Beautiful Soup library's name is bs4 which stands for Beautiful Soup, version 4.
from urllib.request import Request, urlopen
from bs4 import BeautifulSoup

## First, save as a local html file
 - See: https://zetcode.com/python/beautifulsoup/

In [2]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [3]:
# Vol 55 No 1
# URL = '/content/drive/MyDrive/UHS-MRIPhysics-journal-web-scrapping/rawdata/Journal of Magnetic Resonance Imaging_ Vol 55, No 1.html'
# Vol 55 No 2
# URL = '/content/drive/MyDrive/UHS-MRIPhysics-journal-web-scrapping/rawdata/Journal of Magnetic Resonance Imaging_ Vol 55, No 2.html'
# Vol 55 No 3
# URL = '/content/drive/MyDrive/UHS-MRIPhysics-journal-web-scrapping/rawdata/Journal of Magnetic Resonance Imaging_ Vol 55, No 3.html'
# Vol 55 No 4
# URL = '/content/drive/MyDrive/UHS-MRIPhysics-journal-web-scrapping/rawdata/Journal of Magnetic Resonance Imaging_ Vol 55, No 4.html'
# Vol 55 No 5
URL = '/content/drive/MyDrive/UHS-MRIPhysics-journal-web-scrapping/rawdata/Journal of Magnetic Resonance Imaging_ Vol 55, No 5.html'

with open(URL, 'r') as f:

    contents = f.read()

    soup = BeautifulSoup(contents, 'lxml')

Find the corresponding index, and identify the title info

In [4]:
def safeOpenParsePage(targetUrl):
    try:
        # tmpurl = urlopen(Request(targetUrl, headers={'User-Agent': 'Chrome/92.0.4515.107'}))
        tmpurl = open(targetUrl, 'r')
        tmpR = tmpurl.read()
        # tmpSoup = BeautifulSoup(tmpR, 'html.parser')
        tmpSoup = BeautifulSoup(tmpR, 'lxml')
        return tmpSoup
    except urllib.error.HTTPError as e:
        print(e)
        return None

soupJMRI = safeOpenParsePage(URL)
if soupJMRI is not None:
      print(soupJMRI.prettify())

<!DOCTYPE html>
<html class="pb-page" data-request-id="d1cd10b2-9f5e-4b85-ac63-d8622867835b" lang="en">
 <head data-pb-dropzone="head">
  <meta content="IE=edge" http-equiv="X-UA-Compatible"/>
  <meta content=";journal:journal:15222586;ctype:string:Journal Content;website:website:pericles;page:string:Table of Contents;requestedJournal:journal:15222586;wgroup:string:Publication Websites;pageGroup:string:Publication Pages;issue:issue:doi\:10.1002/jmri.v55.5" name="pbContext"/>
  <script type="text/javascript">
   var $DoubleClickZone = "j-magres-imaging_jmri";var $DoubleClickSite =  "wly.radiol.imag_000105";
  </script>
  <script id="analyticDigitalData">
   digitalData = {"site":{"ip":"152.78.0.24","environment":"LIVE","website":"onlinelibrary.wiley.com","websiteCode":"pericles","serverDate":"2022-05-05"},"identities":[{"type":"BasicGroup","uuid":"e623deab-c83b-41e3-84a2-d9f1d46c5f17"},{"type":"ReferrerUser","uuid":"5a2c1d70-1255-4a45-95f3-a3aa3a2771fb"},{"type":"InstitutionUser","uuid"

In [5]:
# Get the title
title = soupJMRI.title
print(title)

<title> Journal of Magnetic Resonance Imaging: Vol 55, No 5</title>


In [6]:
# Print out the text
text = soupJMRI.get_text()
print(soup.text)

# Another way to extract text
# str(all_links[103]).split("<h2>")[1].replace("</h2></a>", "")



var $DoubleClickZone = "j-magres-imaging_jmri";var $DoubleClickSite =  "wly.radiol.imag_000105";digitalData = {"site":{"ip":"152.78.0.24","environment":"LIVE","website":"onlinelibrary.wiley.com","websiteCode":"pericles","serverDate":"2022-05-05"},"identities":[{"type":"BasicGroup","uuid":"e623deab-c83b-41e3-84a2-d9f1d46c5f17"},{"type":"ReferrerUser","uuid":"5a2c1d70-1255-4a45-95f3-a3aa3a2771fb"},{"type":"InstitutionUser","uuid":"58cbd8e6-3c5d-4fc0-ac26-57c4792d2c4b","customerRecords":[{"customerDomain":"ALM-CU","customerNumber":"EALWB000204"}]},{"type":"SmartGroupUser","uuid":"cf9c85b9-7f3b-4309-aeaa-c41669cabe3b"},{"type":"SmartGroupUser","uuid":"db8b8d10-1aed-4809-b3ee-70938c569014"},{"type":"SmartGroupUser","uuid":"cdce41c2-a8de-4a64-a4d9-a85caaaa2314"},{"type":"SmartGroupUser","uuid":"cb1790a9-a0b9-40b3-915d-806df593aeeb"},{"type":"InstitutionUser","uuid":"dd40e26c-3e96-4e9f-9382-5d13a2a401f3","customerRecords":[{"customerDomain":"ALM-CU","customerNumber":"EAL57933332"}]},{"type"

In [7]:
all_links = soupJMRI.find_all('a')

### Explore Category

In [8]:
# h3 class corresponds to editorial

all_h3 = soupJMRI.find_all("h3")

i = 0
for h3 in all_h3:
  # print(h3)
  print(h3.get_text())
  i += 1
print(i)

Menu
Format
Type of import
Cover Image
Issue Information
Review Articles
Research Articles
Editorial
Research Articles
Editorial
Research Articles
Editorial
Research Articles
Editorial
Research Articles
Editorial
Research Articles
Editorial
Research Articles
Editorial
Research Articles
Editorial
Research Articles
Editorial
Research Articles
Editorial
Research Articles
Editorial
Research Articles
Editorial
Research Articles
Editorial
About Wiley Online Library
Help & Support
Opportunities
Connect with Wiley
36


### Explore Original Research Subcategory

In [9]:
# h4 class corresponds to category

all_h4 = soupJMRI.find_all("h4")

i = 0
for h4 in all_h4:
  # print(h4)
  print(h4.get_text())
  i += 1
print(i)

Neuro
Cardiac
Safety
Technical
Musculoskeletal
Chest
Abdomen
Abdomen
Abdomen
Vascular
Cardiac
Cardiac
Cardiac
Head and Neck
Breast
Musculoskeletal
Neuro
Neuro
18


In [10]:
all_h2 = soupJMRI.find_all("h2")

i = 0
for h2 in all_h2:
  h2_len = len(h2.get_text().split())
  if h2_len > 3:
    print(h2.get_text())
    i += 1
print(i)

# str_h2 = str(all_h2)

# links = soupJMRI.find_all("a")
# str_links = str(links)
# print(str_h2)

# cleantext = BeautifulSoup(str_h2, "lxml").get_text()
# cleantext = BeautifulSoup(str_links, "lxml").get_text()
# print(cleantext)

A Comparative Study of Amide Proton Transfer Weighted Imaging and Intravoxel Incoherent Motion MRI Techniques Versus (18) F-FDG PET to Distinguish Solitary Pulmonary Lesions and Their Subtypes
Magnetic Resonance Iron Imaging in Amyotrophic Lateral Sclerosis
Four-Dimensional Flow Magnetic Resonance Imaging in the Assessment of Blood Flow in the Heart and Great Vessels: A Systematic Review
Magnetic Resonance Imaging During a Pandemic: Recommendations by the ISMRM Safety Committee
Compressed Sensing in Sodium Magnetic Resonance Imaging: Techniques, Applications, and Future Prospects
An Update in Imaging Evaluation of Histopathological Grade of Soft Tissue Sarcomas Using Structural and Quantitative Imaging and Radiomics
A Comparative Study of Amide Proton Transfer Weighted Imaging and Intravoxel Incoherent Motion MRI Techniques Versus (18) F-FDG PET to Distinguish Solitary Pulmonary Lesions and Their Subtypes
Editorial for “A Comparative Study of Amide Proton Transfer Weighted Imaging (APT

## Next, clean up the title list

In [11]:
list_h2 = []
for h2 in all_h2:
  h2_len = len(h2.get_text().split())
  if h2_len > 3:
    cells = h2.get_text()
    list_h2.append(cells)

# Delete the last element: "Log in to Wiley Online Library"
list_h2.pop()

# Insert "Issue Information" as the 2nd element
list_h2.insert(1, "Issue Information")

# JMRI Vol 54 No 2
# list_h2.append("Erratum")

# JMRI Vol 54 No 3
# list_h2.insert(2, "Commentary")
# list_h2.append("Condensation Artifact")
# list_h2.append("Reviewer Acknowledgements")

# JMRI Vol 54 No 6
# list_h2.insert(2, "ISMRM Young Investigator Award Winners")

# JMRI Vol 55 No 1 Pop out a few more
list_h2 = list_h2[:-2]

for l in list_h2:
  print(l)
print(len(list_h2))

A Comparative Study of Amide Proton Transfer Weighted Imaging and Intravoxel Incoherent Motion MRI Techniques Versus (18) F-FDG PET to Distinguish Solitary Pulmonary Lesions and Their Subtypes
Issue Information
Magnetic Resonance Iron Imaging in Amyotrophic Lateral Sclerosis
Four-Dimensional Flow Magnetic Resonance Imaging in the Assessment of Blood Flow in the Heart and Great Vessels: A Systematic Review
Magnetic Resonance Imaging During a Pandemic: Recommendations by the ISMRM Safety Committee
Compressed Sensing in Sodium Magnetic Resonance Imaging: Techniques, Applications, and Future Prospects
An Update in Imaging Evaluation of Histopathological Grade of Soft Tissue Sarcomas Using Structural and Quantitative Imaging and Radiomics
A Comparative Study of Amide Proton Transfer Weighted Imaging and Intravoxel Incoherent Motion MRI Techniques Versus (18) F-FDG PET to Distinguish Solitary Pulmonary Lesions and Their Subtypes
Editorial for “A Comparative Study of Amide Proton Transfer Wei

### Add dates first published

In [12]:
# li class: corresponds to "First Published"

all_li = soupJMRI.find_all("li")

# i = 0
# for li in all_li:
  # li_class = li.get_attribute_list("class")
  # print(li_class)
  # if li_class == ['ePubDate']:
    # print(li.get_text())
    # i += 1
  # print(li.get_text())
  # i += 1
# print(i)

list_date = []
for li in all_li:
  li_class = li.get_attribute_list("class")
  if li_class == ['ePubDate']:
    cells = li.get_text().split(': ')[1]
    list_date.append(cells)

for l in list_date:
  print(l)
print(len(list_date))

15 April 2022
15 April 2022
15 February 2021
20 August 2021
20 December 2021
17 December 2021
12 October 2021
01 November 2021
10 November 2021
09 September 2021
01 October 2021
20 September 2021
14 September 2021
23 September 2021
16 September 2021
01 October 2021
24 September 2021
29 September 2021
10 August 2021
06 August 2021
22 September 2021
06 November 2021
21 June 2021
23 June 2021
22 September 2021
15 October 2021
03 October 2021
20 October 2021
14 October 2021
19 October 2021
14 October 2021
29 November 2021
22 October 2021
28 September 2021
15 October 2021
30 September 2021
01 October 2021
18 September 2021
38


In [13]:
list_url = []
for link in all_links:
  if 'visitable' in str(link.get("class")): # and '/doi/' in link.get("href"):
    cells = link.get("href").replace('/doi', 'https://doi.org')
    list_url.append(cells)

for l in list_url:
  print(l)
print(len(list_url))

https://doi.org/10.1002/jmri.27712
https://doi.org/10.1002/jmri.27713
https://doi.org/10.1002/jmri.27530
https://doi.org/10.1002/jmri.27874
https://doi.org/10.1002/jmri.28006
https://doi.org/10.1002/jmri.28029
https://doi.org/10.1002/jmri.27954
https://doi.org/10.1002/jmri.27977
https://doi.org/10.1002/jmri.27987
https://doi.org/10.1002/jmri.27917
https://doi.org/10.1002/jmri.27947
https://doi.org/10.1002/jmri.27921
https://doi.org/10.1002/jmri.27904
https://doi.org/10.1002/jmri.27928
https://doi.org/10.1002/jmri.27667
https://doi.org/10.1002/jmri.27938
https://doi.org/10.1002/jmri.27935
https://doi.org/10.1002/jmri.27944
https://doi.org/10.1002/jmri.27881
https://doi.org/10.1002/jmri.27880
https://doi.org/10.1002/jmri.27931
https://doi.org/10.1002/jmri.27973
https://doi.org/10.1002/jmri.27791
https://doi.org/10.1002/jmri.27799
https://doi.org/10.1002/jmri.27930
https://doi.org/10.1002/jmri.27939
https://doi.org/10.1002/jmri.27941
https://doi.org/10.1002/jmri.27955
https://doi.org/10.1

In [14]:
# df = pd.DataFrame({'title':list_h2, 'url': list_url})
df = pd.DataFrame({'Journal': title, 
                   #'Category': list_category, 
                   'Title': list_h2, 
                   'First Published': list_date, 
                   'DOI': list_url})
df

Unnamed: 0,Journal,Title,First Published,DOI
0,Journal of Magnetic Resonance Imaging: Vol 55...,A Comparative Study of Amide Proton Transfer W...,15 April 2022,https://doi.org/10.1002/jmri.27712
1,Journal of Magnetic Resonance Imaging: Vol 55...,Issue Information,15 April 2022,https://doi.org/10.1002/jmri.27713
2,Journal of Magnetic Resonance Imaging: Vol 55...,Magnetic Resonance Iron Imaging in Amyotrophic...,15 February 2021,https://doi.org/10.1002/jmri.27530
3,Journal of Magnetic Resonance Imaging: Vol 55...,Four-Dimensional Flow Magnetic Resonance Imagi...,20 August 2021,https://doi.org/10.1002/jmri.27874
4,Journal of Magnetic Resonance Imaging: Vol 55...,Magnetic Resonance Imaging During a Pandemic: ...,20 December 2021,https://doi.org/10.1002/jmri.28006
5,Journal of Magnetic Resonance Imaging: Vol 55...,Compressed Sensing in Sodium Magnetic Resonanc...,17 December 2021,https://doi.org/10.1002/jmri.28029
6,Journal of Magnetic Resonance Imaging: Vol 55...,An Update in Imaging Evaluation of Histopathol...,12 October 2021,https://doi.org/10.1002/jmri.27954
7,Journal of Magnetic Resonance Imaging: Vol 55...,A Comparative Study of Amide Proton Transfer W...,01 November 2021,https://doi.org/10.1002/jmri.27977
8,Journal of Magnetic Resonance Imaging: Vol 55...,Editorial for “A Comparative Study of Amide Pr...,10 November 2021,https://doi.org/10.1002/jmri.27987
9,Journal of Magnetic Resonance Imaging: Vol 55...,Perfusion Analysis of Kidney Injury in Rats Wi...,09 September 2021,https://doi.org/10.1002/jmri.27917


## Third, manually create the list of category based on Issue Information
 - See [here](https://stackoverflow.com/questions/4654414/python-append-item-to-list-n-times) for extending elements in a list for X times

In [15]:
# list_category = ['Cover Image', 'Issue Information', 'Commentary']

### Vol 55 No 1 ###
# list_category = ['Cover Image', 'Issue Information']
# list_category.extend(['JMRI-ISMRM Recommendation'] * 1)
# list_category.extend(['Review Articles'] * 5)
# list_category.extend(['Original Research: Abdomen'] * 2)
# list_category.extend(['Original Research: Abdomen', 'Editorial'] * 1)
# list_category.extend(['Original Research: Neuro', 'Editorial'] * 4)
# list_category.extend(['Original Research: Cardiac', 'Editorial'] * 3)
# list_category.extend(['Original Research: Musculoskeletal'] * 1)
# list_category.extend(['Original Research: Musculoskeletal', 'Editorial'] * 1)
# list_category.extend(['Original Research: Vascular'] * 1)
# list_category.extend(['Original Research: Pelvis'] * 2)
# list_category.extend(['Original Research: Pelvis', 'Editorial'] * 1)
# list_category.extend(['Original Research: Technical'] * 1)
# list_category.extend(['Original Research: Technical', 'Editorial'] * 1)

### Vol 55 No 2 ###
# list_category = ['Cover Image', 'Issue Information']
# list_category.extend(['JMRI-ISMRM Recommendation'] * 2)
# list_category.extend(['Review Articles'] * 5)
# list_category.extend(['Original Research: Head and Neck'] * 1)
# list_category.extend(['Original Research: Neuro'] * 3)
# list_category.extend(['Original Research: Neuro', 'Editorial'] * 1)
# list_category.extend(['Original Research: Pelvis', 'Editorial'] * 2)
# list_category.extend(['Original Research: Abdomen'] * 2)
# list_category.extend(['Original Research: Abdomen', 'Editorial'] * 2)
# list_category.extend(['Original Research: Pediatrics', 'Editorial'] * 1)
# list_category.extend(['Original Research: Cardiac', 'Editorial'] * 3)
# list_category.extend(['Original Research: Musculoskeletal', 'Editorial'] * 2)
# list_category.extend(['Letter to the Editor'] * 1)

### Vol 55 No 3 ###
# list_category = ['Cover Image', 'Issue Information']
# list_category.extend(['CME Article'] * 1)
# list_category.extend(['Review Articles'] * 5)
# list_category.extend(['Original Research: Breast', 'Editorial'] * 1)
# list_category.extend(['Original Research: Abdomen'] * 3)
# list_category.extend(['Original Research: Abdomen', 'Editorial'] * 1)
# list_category.extend(['Original Research: Technical'] * 1)
# list_category.extend(['Original Research: Cardiac'] * 1)
# list_category.extend(['Original Research: Cardiac', 'Editorial'] * 2)
# list_category.extend(['Original Research: Musculoskeletal'] * 1)
# list_category.extend(['Original Research: Neuro'] * 1)
# list_category.extend(['Original Research: Neuro', 'Editorial'] * 2)
# list_category.extend(['Original Research: Vascular', 'Editorial'] * 1)

### Vol 55 No 4 ###
# list_category = ['Cover Image', 'Issue Information']
# list_category.extend(['CME Article'] * 1)
# list_category.extend(['Review Articles'] * 5)
# list_category.extend(['Original Research: Head and Neck', 'Editorial'] * 2)
# list_category.extend(['Original Research: Cardiac'] * 1)
# list_category.extend(['Original Research: Cardiac', 'Editorial'] * 1)
# list_category.extend(['Original Research: Whole Body'] * 1)
# list_category.extend(['Original Research: Neuro'] * 2)
# list_category.extend(['Original Research: Neuro', 'Editorial'] * 3)
# list_category.extend(['Original Research: Chest', 'Editorial'] * 1)
# list_category.extend(['Original Research: Musculoskeletal'] * 1)
# list_category.extend(['Original Research: Abdomen'] * 4)
# list_category.extend(['Original Research: Vascular', 'Editorial'] * 1)

### Vol 55 No 5 ###
list_category = ['Cover Image', 'Issue Information']
list_category.extend(['Review Articles'] * 5)
list_category.extend(['Original Research: Chest', 'Editorial'] * 1)
list_category.extend(['Original Research: Abdomen'] * 2)
list_category.extend(['Original Research: Abdomen', 'Editorial'] * 2)
list_category.extend(['Original Research: Vascular'] * 1)
list_category.extend(['Original Research: Vascular', 'Editorial'] * 1)
list_category.extend(['Original Research: Cardiac', 'Editorial'] * 3)
list_category.extend(['Original Research: Head and Neck'] * 1)
list_category.extend(['Original Research: Head and Neck', 'Editorial'] * 1)
list_category.extend(['Original Research: Breast', 'Editorial'] * 1)
list_category.extend(['Original Research: Musculoskeletal', 'Editorial'] * 1)
list_category.extend(['Editorial'] * 1)
list_category.extend(['Original Research: Neuro'] * 1)
list_category.extend(['Original Research: Neuro', 'Editorial'] * 2)
list_category.extend(['Editorial'] * 1)

i = 0
for l in list_category:
  print(l)
  i += 1
print(i)

Cover Image
Issue Information
Review Articles
Review Articles
Review Articles
Review Articles
Review Articles
Original Research: Chest
Editorial
Original Research: Abdomen
Original Research: Abdomen
Original Research: Abdomen
Editorial
Original Research: Abdomen
Editorial
Original Research: Vascular
Original Research: Vascular
Editorial
Original Research: Cardiac
Editorial
Original Research: Cardiac
Editorial
Original Research: Cardiac
Editorial
Original Research: Head and Neck
Original Research: Head and Neck
Editorial
Original Research: Breast
Editorial
Original Research: Musculoskeletal
Editorial
Editorial
Original Research: Neuro
Original Research: Neuro
Editorial
Original Research: Neuro
Editorial
Editorial
38


In [16]:
df.insert(1, 'Category', list_category)
df

Unnamed: 0,Journal,Category,Title,First Published,DOI
0,Journal of Magnetic Resonance Imaging: Vol 55...,Cover Image,A Comparative Study of Amide Proton Transfer W...,15 April 2022,https://doi.org/10.1002/jmri.27712
1,Journal of Magnetic Resonance Imaging: Vol 55...,Issue Information,Issue Information,15 April 2022,https://doi.org/10.1002/jmri.27713
2,Journal of Magnetic Resonance Imaging: Vol 55...,Review Articles,Magnetic Resonance Iron Imaging in Amyotrophic...,15 February 2021,https://doi.org/10.1002/jmri.27530
3,Journal of Magnetic Resonance Imaging: Vol 55...,Review Articles,Four-Dimensional Flow Magnetic Resonance Imagi...,20 August 2021,https://doi.org/10.1002/jmri.27874
4,Journal of Magnetic Resonance Imaging: Vol 55...,Review Articles,Magnetic Resonance Imaging During a Pandemic: ...,20 December 2021,https://doi.org/10.1002/jmri.28006
5,Journal of Magnetic Resonance Imaging: Vol 55...,Review Articles,Compressed Sensing in Sodium Magnetic Resonanc...,17 December 2021,https://doi.org/10.1002/jmri.28029
6,Journal of Magnetic Resonance Imaging: Vol 55...,Review Articles,An Update in Imaging Evaluation of Histopathol...,12 October 2021,https://doi.org/10.1002/jmri.27954
7,Journal of Magnetic Resonance Imaging: Vol 55...,Original Research: Chest,A Comparative Study of Amide Proton Transfer W...,01 November 2021,https://doi.org/10.1002/jmri.27977
8,Journal of Magnetic Resonance Imaging: Vol 55...,Editorial,Editorial for “A Comparative Study of Amide Pr...,10 November 2021,https://doi.org/10.1002/jmri.27987
9,Journal of Magnetic Resonance Imaging: Vol 55...,Original Research: Abdomen,Perfusion Analysis of Kidney Injury in Rats Wi...,09 September 2021,https://doi.org/10.1002/jmri.27917


## Save as csv

In [None]:
df.to_csv(path_or_buf='/content/drive/MyDrive/UHS-MRIPhysics-journal-web-scrapping/processed/jmri/jmri-vol-55-no-5.csv',
          # path_or_buf='/content/drive/MyDrive/UHS-MRIPhysics-journal-web-scrapping/processed/jmri/jmri-vol-55-no-4.csv',
          #path_or_buf='/content/drive/MyDrive/UHS-MRIPhysics-journal-web-scrapping/processed/jmri/jmri-vol-55-no-3.csv',
          #path_or_buf='/content/drive/MyDrive/UHS-MRIPhysics-journal-web-scrapping/processed/jmri/jmri-vol-55-no-2.csv',
          #path_or_buf='/content/drive/MyDrive/UHS-MRIPhysics-journal-web-scrapping/processed/jmri/jmri-vol-55-no-1.csv',
          index=False)

## Save as xlsx file
 - See [here](https://xlsxwriter.readthedocs.io/working_with_pandas.html) for instruction

In [None]:
# !pip install xlsxwriter
import xlsxwriter

# (Comment out after saving first sheet) Create an ExcelWriter object
# writer = pd.ExcelWriter('/content/drive/MyDrive/UHS-MRIPhysics-journal-web-scrapping/processed/jmri-all-summary.xlsx', engine='xlsxwriter')

In [None]:
df.to_excel(excel_writer=writer,
            # sheet_name='jmri-vol-54-no-1',
            # sheet_name='jmri-vol-54-no-2',
            sheet_name='jmri-vol-54-no-3',
            index=False)

In [None]:
# (Uncomment at the end) Close the Pandas Excel writer and output the Excel file.
writer.save()