# Import Dependencies

In [6]:
from bs4 import BeautifulSoup
from splinter import Browser
import pandas as pd
import time

# Init Splinter Browser

In [3]:
# MAC
# executable_path = { 'executable_path': '/usr/local/bin/chromedriver' }

# WINDOWS
executable_path = {'executable_path': 'chromedriver.exe'}
browser = Browser('chrome', **executable_path, headless=False)

# Scrape - NASA Mars News

In [7]:
# URL of page to be scraped
url1 = 'https://mars.nasa.gov/news/?page=0&per_page=40&order=publish_date+desc%2Ccreated_at+desc&search=&category=19%2C165%2C184%2C204&blank_scope=Latest'

# Open webpage
browser.visit(url1)

# Wait 3 seconds for the page to load before scraping
time.sleep(3)

# Retrieve HTML webpage source
html1 = browser.html

# Parse HTML webpage source using BeautifulSoup
soup1 = BeautifulSoup(html1, 'html.parser')

# Scrape the latest news title and paragraph
result1 = soup1.find('div', class_='list_text')
news_title = result1.find('div', class_='content_title').text
news_p = result1.find("div", class_="article_teaser_body").text
print(news_title)
print(news_p)

MOXIE Could Help Future Rockets Launch Off Mars
NASA's Perseverance rover carries a device to convert Martian air into oxygen that, if produced on a larger scale, could be used not just for breathing, but also for fuel.


# Scrape - JPL Mars Space Images - Featured Image

In [12]:
# URL of page to be scraped
url2 = 'https://www.jpl.nasa.gov/spaceimages/?search=&category=Mars'

# Open webpage
browser.visit(url2)

# Retrieve HTML webpage source
html2 = browser.html

# Parse HTML webpage source using BeautifulSoup
soup2 = BeautifulSoup(html2, 'html.parser')

# Scrape the Featured Image URL
divs = soup2.find('div', class_='carousel_items')
article = divs.find('article', class_='carousel_item')
featured_img = article['style'].split("url('")[1].split("');")[0]
featured_image_url = f'https://www.jpl.nasa.gov{featured_img}'
print(featured_image_url)

https://www.jpl.nasa.gov/spaceimages/images/wallpaper/PIA17009-1920x1200.jpg


# Scrape - Mars Facts

In [13]:
# URL of page to be scraped
url3 = 'https://space-facts.com/mars/'

# Use Pandas to scrape the tables from the URL
dfs = pd.read_html(url3)

# Mars fact Dataframe 
mars_fact_df = dfs[0]
mars_fact_df = mars_fact_df.rename(columns={0:'Description',
                                            1:'Mars'})

# Convert the data to a HTML table string
mars_fact_html_string = mars_fact_df.to_html(index=False, classes=['table','table-striped'], border=0, justify='left')
print(mars_fact_html_string)

<table border="0" class="dataframe table table-striped">
  <thead>
    <tr style="text-align: left;">
      <th>Description</th>
      <th>Mars</th>
    </tr>
  </thead>
  <tbody>
    <tr>
      <td>Equatorial Diameter:</td>
      <td>6,792 km</td>
    </tr>
    <tr>
      <td>Polar Diameter:</td>
      <td>6,752 km</td>
    </tr>
    <tr>
      <td>Mass:</td>
      <td>6.39 × 10^23 kg (0.11 Earths)</td>
    </tr>
    <tr>
      <td>Moons:</td>
      <td>2 (Phobos &amp; Deimos)</td>
    </tr>
    <tr>
      <td>Orbit Distance:</td>
      <td>227,943,824 km (1.38 AU)</td>
    </tr>
    <tr>
      <td>Orbit Period:</td>
      <td>687 days (1.9 years)</td>
    </tr>
    <tr>
      <td>Surface Temperature:</td>
      <td>-87 to -5 °C</td>
    </tr>
    <tr>
      <td>First Record:</td>
      <td>2nd millennium BC</td>
    </tr>
    <tr>
      <td>Recorded By:</td>
      <td>Egyptian astronomers</td>
    </tr>
  </tbody>
</table>


# Scrape - Mars Hemispheres

In [14]:
# URL of page to be scraped
url4 = 'https://astrogeology.usgs.gov/search/results?q=hemisphere+enhanced&k1=target&v1=Mars'

# Open webpage
browser.visit(url4)

# Retrieve HTML webpage source
html3 = browser.html

# Parse HTML webpage source using BeautifulSoup
soup3 = BeautifulSoup(html3, 'html.parser')

# Scrape the Mars Hemisphere Titles
h3s = soup3.find_all('h3')
hemisphere_links = [h3.text for h3 in h3s]
hemisphere_links

['Cerberus Hemisphere Enhanced',
 'Schiaparelli Hemisphere Enhanced',
 'Syrtis Major Hemisphere Enhanced',
 'Valles Marineris Hemisphere Enhanced']

In [15]:
# Variable to hold the list of Mars Hemisphere dictionary
hemisphere_image_urls = []

# Loop through the Mars Hemisphere Titles
for hemisphere_link in hemisphere_links:
    
    # Find the link with the title and click on the link
    browser.links.find_by_partial_text(hemisphere_link).click()
    
    # Retrieve HTML webpage source
    html4 = browser.html
    
    # Parse HTML webpage source using BeautifulSoup
    soup4 = BeautifulSoup(html4, 'html.parser')

    # Scrape the image url string for the full resolution hemisphere image
    original_imgs = soup4.find_all('a', href=True, text='Sample')
    for original_img in original_imgs:
        original_img_url = original_img['href']

    # Scrape the Hemisphere title containing the hemisphere name
    title_tag = soup4.find('h2', class_='title')
    title = title_tag.text

    # Add the Mars Hemisphere dictionary to the list
    hemisphere_image_urls.append({'title': title, 'img_url':original_img_url})

    # Go back to previous page
    browser.visit(url4)

hemisphere_image_urls

[{'title': 'Cerberus Hemisphere Enhanced',
  'img_url': 'https://astropedia.astrogeology.usgs.gov/download/Mars/Viking/cerberus_enhanced.tif/full.jpg'},
 {'title': 'Schiaparelli Hemisphere Enhanced',
  'img_url': 'https://astropedia.astrogeology.usgs.gov/download/Mars/Viking/schiaparelli_enhanced.tif/full.jpg'},
 {'title': 'Syrtis Major Hemisphere Enhanced',
  'img_url': 'https://astropedia.astrogeology.usgs.gov/download/Mars/Viking/syrtis_major_enhanced.tif/full.jpg'},
 {'title': 'Valles Marineris Hemisphere Enhanced',
  'img_url': 'https://astropedia.astrogeology.usgs.gov/download/Mars/Viking/valles_marineris_enhanced.tif/full.jpg'}]