In [1]:
from splinter import Browser
from splinter.exceptions import ElementDoesNotExist
from bs4 import BeautifulSoup

In [2]:
# https://splinter.readthedocs.io/en/latest/drivers/chrome.html
!which chromedriver

/usr/local/bin/chromedriver


In [3]:
##executable_path = {'executable_path': '/usr/local/bin/chromedriver'}
browser = Browser('chrome', headless=False)

# Step 1: Scraping

## NASA Mars News

In [4]:
nasa_url = 'http://mars.nasa.gov/news/'
browser.visit(nasa_url)

In [5]:
# HTML object
html = browser.html

# Parse HTML with Beautiful Soup
soup = BeautifulSoup(html, 'html.parser')

# Retrieve all elements that contain title and paragrapg
news_articles = soup.find_all('div', class_='list_text')

# Set up a loop to collect only title and paragraph

for news in news_articles:
    news_title = news.find('div', class_='content_title').text
    news_paragraph = news.find('div', class_='article_teaser_body').text

    print('---------------------')
    print(news_title)
    print(news_paragraph)

---------------------
NASA Engineers Checking InSight's Weather Sensors
An electronics issue is suspected to be preventing the sensors from sharing their data about Mars weather with the spacecraft.
---------------------
Follow NASA's Perseverance Rover in Real Time on Its Way to Mars
A crisply rendered web application can show you where the agency's Mars 2020 mission is right now as it makes its way to the Red Planet for a Feb. 18, 2021, landing.
---------------------
NASA Establishes Board to Initially Review Mars Sample Return Plans
The board will assist with analysis of current plans and goals for one of the most difficult missions humanity has ever undertaken.
---------------------
NASA's Ingenuity Mars Helicopter Recharges Its Batteries in Flight
Headed to the Red Planet with the Perseverance rover, the pioneering helicopter is powered up for the first time in interplanetary space as part of a systems check.
---------------------
Celebrate Mars Reconnaissance Orbiter's Views From

## JPL Mars Space Images - Featured Image

In [6]:
import time

browser = Browser('chrome', headless=False)

spaceimage_url = 'http://www.jpl.nasa.gov/spaceimages/?search=&category-Mars'
browser.visit(spaceimage_url)

# Go to Full Image by click the button
browser.click_link_by_partial_text('FULL IMAGE')
time.sleep(5)

# Go to More Info by click the button
browser.click_link_by_partial_text('more info')

# Parse HTML with Beautiful Soup
html = browser.html
soup = BeautifulSoup(html, 'html.parser')

# Scrape the URL images with href info
image_url = soup.find('figure', class_='lede').a['href']
featured_image_url = f'https://www.jpl.nasa.gov{image_url}'
print(featured_image_url)



https://www.jpl.nasa.gov/spaceimages/images/largesize/PIA02570_hires.jpg


## Mars Facts

In [7]:
browser = Browser('chrome', headless=False)
facts_url = 'http://space-facts.com/mars/'
browser.visit(facts_url)

In [8]:
import pandas as pd

In [9]:
# Use Pandas to scrape the table containing facts about Mars Planet Profile
table = pd.read_html(facts_url)
mars_facts = table[0]
print(mars_facts)

                      0                              1
0  Equatorial Diameter:                       6,792 km
1       Polar Diameter:                       6,752 km
2                 Mass:  6.39 × 10^23 kg (0.11 Earths)
3                Moons:            2 (Phobos & Deimos)
4       Orbit Distance:       227,943,824 km (1.38 AU)
5         Orbit Period:           687 days (1.9 years)
6  Surface Temperature:                   -87 to -5 °C
7         First Record:              2nd millennium BC
8          Recorded By:           Egyptian astronomers


In [10]:
# Rename the columns
mars_facts.columns = ['Description', 'Mars'] 
print(mars_facts)

            Description                           Mars
0  Equatorial Diameter:                       6,792 km
1       Polar Diameter:                       6,752 km
2                 Mass:  6.39 × 10^23 kg (0.11 Earths)
3                Moons:            2 (Phobos & Deimos)
4       Orbit Distance:       227,943,824 km (1.38 AU)
5         Orbit Period:           687 days (1.9 years)
6  Surface Temperature:                   -87 to -5 °C
7         First Record:              2nd millennium BC
8          Recorded By:           Egyptian astronomers


In [11]:
# Reset the index
mars_facts.set_index('Description', inplace=True)
print(mars_facts)

                                               Mars
Description                                        
Equatorial Diameter:                       6,792 km
Polar Diameter:                            6,752 km
Mass:                 6.39 × 10^23 kg (0.11 Earths)
Moons:                          2 (Phobos & Deimos)
Orbit Distance:            227,943,824 km (1.38 AU)
Orbit Period:                  687 days (1.9 years)
Surface Temperature:                   -87 to -5 °C
First Record:                     2nd millennium BC
Recorded By:                   Egyptian astronomers


In [14]:
# Use pandas to convert the data to a HTML table string
html_table = mars_facts.to_html
#html_table = html_table.replace('\n', '')

## Mars Hemispheres

In [15]:
browser = Browser('chrome', headless=False)
hemisphere_url = 'https://astrogeology.usgs.gov/search/results?q=hemisphere+enhanced&k1=target&v1=Mars'
browser.visit(hemisphere_url)

In [16]:
# Iterate through all pages: 50 pages on the website
hemisphere_html = browser.html
# Parse HTML with Beautiful Soup
hemisphere_soup = BeautifulSoup(hemisphere_html, 'html.parser')

# Create a dictionary to store titles and image links
hemisphere_image_urls = []

# Retrieve all elements that contain book information
hemispheres = hemisphere_soup.find_all('div', class_='item')

# Iterate through each image
for hemisphere in hemispheres:
    
    # Find title with h3
    title = hemisphere.find("h3").text
    # Remove 'Enhanced' from the h3 title
    title = title.replace("Enhanced", "")
    
    # Collect image link name and browser visit each mars name link
    end_link_name = hemisphere.find("a")["href"]
    image_link = "https://astrogeology.usgs.gov/" + end_link_name   
    browser.visit(image_link)
    
    # Then grab the 'Sample' full jpg image under class 'downloads' 
    html = browser.html
    soup = BeautifulSoup(html, "html.parser")
    downloads = soup.find("div", class_="downloads")
    # Collect the image href for image_url
    image_url = downloads.find("a")["href"]
    hemisphere_image_urls.append({"title": title, "img_url": image_url})

# Print image title and url
print(hemisphere_image_urls)

[{'title': 'Cerberus Hemisphere ', 'img_url': 'https://astropedia.astrogeology.usgs.gov/download/Mars/Viking/cerberus_enhanced.tif/full.jpg'}, {'title': 'Schiaparelli Hemisphere ', 'img_url': 'https://astropedia.astrogeology.usgs.gov/download/Mars/Viking/schiaparelli_enhanced.tif/full.jpg'}, {'title': 'Syrtis Major Hemisphere ', 'img_url': 'https://astropedia.astrogeology.usgs.gov/download/Mars/Viking/syrtis_major_enhanced.tif/full.jpg'}, {'title': 'Valles Marineris Hemisphere ', 'img_url': 'https://astropedia.astrogeology.usgs.gov/download/Mars/Viking/valles_marineris_enhanced.tif/full.jpg'}]


In [17]:
# Put together all scraped info as mars_data
mars_data = {
    "News_Title": news_title,
    "News_Paragraph": news_paragraph,
    "Featured_Image": featured_image_url,
    "Mars_Facts": html_table,
    "Mars_Hemisphere_Image": hemisphere_image_urls
}