In [1]:
#Dependencies
from splinter import Browser
from bs4 import BeautifulSoup
from webdriver_manager.chrome import ChromeDriverManager
import pandas as pd

In [2]:
#Setup splinter
def init_browser():
    executable_path = {'executable_path': ChromeDriverManager().install()}
    browser = Browser('chrome', **executable_path, headless=False)
    return browser

In [3]:
#Create a dictionary to store all results 
def scrape():
    browser = init_browser()
    data = {
        'p_data':scrape_1(browser),
        'f_image': scrape_2(browser),
        'table_data': scrape_3(browser),
        'hemispheres': scrape_4(browser)
    }
    return data

## Get the most recent Mars story headline and summary

In [4]:
#Use splinter to go to the desired url
def scrape_1(browser):
    url = 'https://mars.nasa.gov/news/?page=0&per_page=40&order=publish_date+desc%2Ccreated_at+desc&search=&category=19%2C165%2C184%2C204&blank_scope=Latest'
    browser.visit(url)
    #Collect the latest news title and paragraph test from Nasa's website
    html = browser.html
    soup = BeautifulSoup(html, 'html.parser')
    element = soup.select_one('ul.item_list li.slide')
    title_pod = element.find('div', class_='content_title')
    news_title = title_pod.get_text()
    #news_title = soup.find(title_pod.find('a')('target=_self'))
    news_p = element.find('div', class_='article_teaser_body').get_text()
    return {
        'title':news_title,
        'paragraph': news_p
    }

## Get the most recent featured Mars Image

In [5]:
#url for new featured image
def scrape_2(browser):
    url = "https://data-class-jpl-space.s3.amazonaws.com/JPL_Space/"
    browser.visit(url+'index.html')
    html = browser.html
    soup = BeautifulSoup(html, 'html.parser')
    # Find and click on the Full Image button
    full_image = browser.find_by_css("button.btn.btn-outline-light")
    full_image.click()
    relative_url = soup.select('a', class_='showing fancybox-thumbs')[2].get('href')
    final_url = url + relative_url
    return final_url

## Mars Facts

In [6]:
def scrape_3(browser):
    url = "https://space-facts.com/mars/"
    browser.visit(url)
    mars_facts=pd.read_html(browser.html)[0]
    mars_facts = mars_facts.rename(columns={0:'Description', 1:'Data'})
    mars_table = mars_facts.to_html(index=False)
    return mars_table

## Mars Hemispheres

In [7]:
def scrape_4(browser):
    url = 'https://astrogeology.usgs.gov/search/results?q=hemisphere+enhanced&k1=target&v1=Mars'
    browser.visit(url)
    html = browser.html
    soup = BeautifulSoup(html, 'html.parser')
    items = soup.find_all('div', class_='item')
    results = []
    for item in items:
        title = item.find('h3').text
        relative = item.find('a').get('href')
        img_url = f"https://astrogeology.usgs.gov{relative}"
        browser.visit(img_url)
        html = browser.html
        soup = BeautifulSoup(html, 'html.parser')
        img_url = soup.find('div', class_='downloads').find('a').get('href')
        results.append({
            'title':title,
            'image_url':img_url
        })
    return results

In [8]:
scrape()

[WDM] - Current google-chrome version is 89.0.4389
[WDM] - Get LATEST driver version for 89.0.4389






[WDM] - Driver [/Users/williamcutrone/.wdm/drivers/chromedriver/mac64/89.0.4389.23/chromedriver] found in cache


{'p_data': {'title': "New Study Challenges Long-Held Theory of Fate of Mars' Water",
  'paragraph': 'The new science results indicate that a large quantity of the Red Planet’s water is trapped in its crust rather than having escaped into space.'},
 'f_image': 'https://data-class-jpl-space.s3.amazonaws.com/JPL_Space/image/featured/mars1.jpg',
 'table_data': '<table border="1" class="dataframe">\n  <thead>\n    <tr style="text-align: right;">\n      <th>Description</th>\n      <th>Data</th>\n    </tr>\n  </thead>\n  <tbody>\n    <tr>\n      <td>Equatorial Diameter:</td>\n      <td>6,792 km</td>\n    </tr>\n    <tr>\n      <td>Polar Diameter:</td>\n      <td>6,752 km</td>\n    </tr>\n    <tr>\n      <td>Mass:</td>\n      <td>6.39 × 10^23 kg (0.11 Earths)</td>\n    </tr>\n    <tr>\n      <td>Moons:</td>\n      <td>2 (Phobos &amp; Deimos)</td>\n    </tr>\n    <tr>\n      <td>Orbit Distance:</td>\n      <td>227,943,824 km (1.38 AU)</td>\n    </tr>\n    <tr>\n      <td>Orbit Period:</td>\n   