In [1]:
# Import dependencies
from bs4 import BeautifulSoup
import requests
import pandas as pd
from splinter import Browser 
import time

In [2]:
# NASA.gov Mars Site
# URL of page to be scraped
url = 'https://mars.nasa.gov/news/?page=0&per_page=40&order=publish_date+desc%2Ccreated_at+desc&search=&category=19%2C165%2C184%2C204&blank_scope=Latest'

# Retrieve page with the requests module
response = requests.get(url)
# Create BeautifulSoup object; parse with 'lxml'
soup = BeautifulSoup(response.text, 'lxml')

In [3]:
# Retrieve the parent divs for all articles
results = soup.find_all('div', class_='slide')
url_heading = 'https://mars.nasa.gov'

nasa_news_title = []
nasa_news_p = []
counter = 0

for result in results:
    # scrape the article header 
    headline = result.find('div', class_='content_title').text
    nasa_news_title.append(headline)
    link = url_heading + str(result.find('a')['href'])
    description = result.find('div', class_='rollover_description_inner').text
    nasa_news_p.append(description)
    counter += 1
    print(f"---- Article #{counter} ----")
    print(headline)
    print(description)
    print(link)
    print("-----------------------------")

---- Article #1 ----


NASA Readies Perseverance Mars Rover's Earthly Twin 



Did you know NASA's next Mars rover has a nearly identical sibling on Earth for testing? Even better, it's about to roll for the first time through a replica Martian landscape.

https://mars.nasa.gov/news/8749/nasa-readies-perseverance-mars-rovers-earthly-twin/
-----------------------------
---- Article #2 ----


NASA to Broadcast Mars 2020 Perseverance Launch, Prelaunch Activities



Starting July 27, news activities will cover everything from mission engineering and science to returning samples from Mars to, of course, the launch itself.

https://mars.nasa.gov/news/8716/nasa-to-broadcast-mars-2020-perseverance-launch-prelaunch-activities/
-----------------------------
---- Article #3 ----


The Launch Is Approaching for NASA's Next Mars Rover, Perseverance



The Red Planet's surface has been visited by eight NASA spacecraft. The ninth will be the first that includes a roundtrip ticket in its flight plan. 

In [4]:
# print news titles
nasa_news_title

["\n\nNASA Readies Perseverance Mars Rover's Earthly Twin \n\n",
 '\n\nNASA to Broadcast Mars 2020 Perseverance Launch, Prelaunch Activities\n\n',
 "\n\nThe Launch Is Approaching for NASA's Next Mars Rover, Perseverance\n\n",
 '\n\nNASA to Hold Mars 2020 Perseverance Rover Launch Briefing\n\n',
 "\n\nAlabama High School Student Names NASA's Mars Helicopter\n\n",
 "\n\nMars Helicopter Attached to NASA's Perseverance Rover\n\n"]

In [5]:
# print news descriptions
nasa_news_p

["\nDid you know NASA's next Mars rover has a nearly identical sibling on Earth for testing? Even better, it's about to roll for the first time through a replica Martian landscape.\n",
 '\nStarting July 27, news activities will cover everything from mission engineering and science to returning samples from Mars to, of course, the launch itself.\n',
 "\nThe Red Planet's surface has been visited by eight NASA spacecraft. The ninth will be the first that includes a roundtrip ticket in its flight plan. \n",
 "\nLearn more about the agency's next Red Planet mission during a live event on June 17.\n",
 "\nVaneeza Rupani's essay was chosen as the name for the small spacecraft, which will mark NASA's first attempt at powered flight on another planet.\n",
 "\nThe team also fueled the rover's sky crane to get ready for this summer's history-making launch.\n"]

In [6]:
# JPL Mars Space Images Site
# URL of page to be scraped
base_url = 'https://www.jpl.nasa.gov'
url = 'https://www.jpl.nasa.gov/spaceimages/?search=&category=Mars'

In [7]:
# Setup Chromedriver
executable_path = {'executable_path': '../chromedriver/chromedriver.exe'}
browser = Browser('chrome', **executable_path, headless=False)

In [8]:
browser.visit(url)

In [9]:
# click the full image button
browser.find_by_id('full_image').click()

In [10]:
# Allow Splinter to catch up to updated html after popup appears
time.sleep(5)

In [11]:
# HTML object
html = browser.html
# Parse HTML with Beautiful Soup
soup = BeautifulSoup(html, 'html.parser')
# find more info button
more_info_button = soup.find('div', class_='buttons').find_all('a')[1].text
more_info_button

'more info     '

In [12]:
# click the more info button to go to the largest size image
browser.find_by_text(more_info_button).click()

In [13]:
# HTML object
html = browser.html
# Parse HTML with Beautiful Soup
soup = BeautifulSoup(html, 'html.parser')
main_image = soup.find_all('img', class_='main_image')
main_image

[<img alt="A bubbling cauldron of star birth is highlighted in this image from NASA's Spitzer Space Telescope. Massive stars have blown bubbles, or cavities, in the dust and gas -- a violent process that triggers both the death and birth of stars." class="main_image" src="/spaceimages/images/largesize/PIA15253_hires.jpg" title="A bubbling cauldron of star birth is highlighted in this image from NASA's Spitzer Space Telescope. Massive stars have blown bubbles, or cavities, in the dust and gas -- a violent process that triggers both the death and birth of stars."/>]

In [14]:
# store the main image url
featured_img_url = str(base_url + main_image[0]['src'])
featured_img_url

'https://www.jpl.nasa.gov/spaceimages/images/largesize/PIA15253_hires.jpg'

In [15]:
# Mars Facts Site
# URL of site to be scraped
url = 'https://space-facts.com/mars/'

In [16]:
# Use pandas to read for tables
tables = pd.read_html(url)
planet_comp_df = tables[1]
planet_comp_df

Unnamed: 0,Mars - Earth Comparison,Mars,Earth
0,Diameter:,"6,779 km","12,742 km"
1,Mass:,6.39 × 10^23 kg,5.97 × 10^24 kg
2,Moons:,2,1
3,Distance from Sun:,"227,943,824 km","149,598,262 km"
4,Length of Year:,687 Earth days,365.24 days
5,Temperature:,-87 to -5 °C,-88 to 58°C


In [17]:
# Remove ':' from index column
clean_index = []

for row in planet_comp_df.iloc[:,0]:
    new_row_name = row.replace(':', '')
    clean_index.append(new_row_name)

clean_index

['Diameter',
 'Mass',
 'Moons',
 'Distance from Sun',
 'Length of Year',
 'Temperature']

In [18]:
# Add clean index to df
planet_comp_df['Key Facts'] = clean_index
planet_comp_df

Unnamed: 0,Mars - Earth Comparison,Mars,Earth,Key Facts
0,Diameter:,"6,779 km","12,742 km",Diameter
1,Mass:,6.39 × 10^23 kg,5.97 × 10^24 kg,Mass
2,Moons:,2,1,Moons
3,Distance from Sun:,"227,943,824 km","149,598,262 km",Distance from Sun
4,Length of Year:,687 Earth days,365.24 days,Length of Year
5,Temperature:,-87 to -5 °C,-88 to 58°C,Temperature


In [19]:
# Reset index to clean index and drop old index
planet_comp_df = planet_comp_df.set_index('Key Facts')
planet_comp_df = planet_comp_df.drop(['Mars - Earth Comparison'], axis=1)
planet_comp_df

Unnamed: 0_level_0,Mars,Earth
Key Facts,Unnamed: 1_level_1,Unnamed: 2_level_1
Diameter,"6,779 km","12,742 km"
Mass,6.39 × 10^23 kg,5.97 × 10^24 kg
Moons,2,1
Distance from Sun,"227,943,824 km","149,598,262 km"
Length of Year,687 Earth days,365.24 days
Temperature,-87 to -5 °C,-88 to 58°C


In [20]:
# Mars Hemispheres Site
# URL of site to be scraped
base_url = 'https://astrogeology.usgs.gov/'
url = 'https://astrogeology.usgs.gov/search/results?q=hemisphere+enhanced&k1=target&v1=Mars'

In [21]:
# Setup Chromedriver
executable_path = {'executable_path': '../chromedriver/chromedriver.exe'}
browser = Browser('chrome', **executable_path, headless=False)

In [22]:
browser.visit(url)

In [23]:
# HTML object
html = browser.html
# Parse HTML with Beautiful Soup
soup = BeautifulSoup(html, 'html.parser')
# find html for looping through hemisphere links 
hemi_pages = soup.find('div', class_='collapsible results').find_all('div', class_='description')
hemi_pages

[<div class="description"><a class="itemLink product-item" href="/search/map/Mars/Viking/cerberus_enhanced"><h3>Cerberus Hemisphere Enhanced</h3></a><span class="subtitle" style="float:left">image/tiff 21 MB</span><span class="pubDate" style="float:right"></span><br/><p>Mosaic of the Cerberus hemisphere of Mars projected into point perspective, a view similar to that which one would see from a spacecraft. This mosaic is composed of 104 Viking Orbiter images acquired…</p></div>,
 <div class="description"><a class="itemLink product-item" href="/search/map/Mars/Viking/schiaparelli_enhanced"><h3>Schiaparelli Hemisphere Enhanced</h3></a><span class="subtitle" style="float:left">image/tiff 35 MB</span><span class="pubDate" style="float:right"></span><br/><p>Mosaic of the Schiaparelli hemisphere of Mars projected into point perspective, a view similar to that which one would see from a spacecraft. The images were acquired in 1980 during early northern…</p></div>,
 <div class="description"><a 

In [24]:
# Setup dictionary for Hemisphere titles and image links
hemisphere_image_urls = []
counter = 0

# Write loop to extract all hemisphere names and image links
for hemi_page in hemi_pages:
    # create dictionary level for each planet
    hemi_dictionary = {}
    hemi_dictionary['title'] = ''
    hemi_dictionary['img_url'] = ''
    
    # first add title to dictionary
    title = hemi_page.find('h3').text
    title = title.replace(' Enhanced', '')
    hemi_dictionary['title'] = title
    hemisphere_image_urls.append(hemi_dictionary)
    
    # next add the image url
    hemi_page_url = base_url + hemi_pages[counter].find('a')['href']
    # visit the hemisphere page
    browser.visit(hemi_page_url)
    # Allow Splinter to catch up to updated html
    time.sleep(5)
    
    # retrieve new html
    # HTML object
    html = browser.html
    # Parse HTML with Beautiful Soup
    soup = BeautifulSoup(html, 'html.parser')
    # Find the text on the jpg download link
    img_url = str(soup.find('div', class_='downloads').find('li').find('a')['href'])

    # increase the counter and add the dictionary to the hemisphere list
    counter += 1
    hemi_dictionary['img_url'] = img_url
    
hemisphere_image_urls

[{'title': 'Cerberus Hemisphere',
  'img_url': 'https://astropedia.astrogeology.usgs.gov/download/Mars/Viking/cerberus_enhanced.tif/full.jpg'},
 {'title': 'Schiaparelli Hemisphere',
  'img_url': 'https://astropedia.astrogeology.usgs.gov/download/Mars/Viking/schiaparelli_enhanced.tif/full.jpg'},
 {'title': 'Syrtis Major Hemisphere',
  'img_url': 'https://astropedia.astrogeology.usgs.gov/download/Mars/Viking/syrtis_major_enhanced.tif/full.jpg'},
 {'title': 'Valles Marineris Hemisphere',
  'img_url': 'https://astropedia.astrogeology.usgs.gov/download/Mars/Viking/valles_marineris_enhanced.tif/full.jpg'}]