In [42]:
# Import dependencies
import pandas as pd
import requests
from bs4 import BeautifulSoup as bs
from splinter import Browser

### Scraping NASA Site for news

In [2]:
# Scraped url
url = 'https://mars.nasa.gov/news/'

In [3]:
# Retrieve data from request
response = requests.get(url)

In [4]:
# Create BeautifulSoup object
soup = bs(response.text,'html.parser')

In [5]:
# Print retrieved page
soup

<!DOCTYPE html>

<html lang="en" xml:lang="en" xmlns="http://www.w3.org/1999/xhtml">
<head>
<meta content="text/html; charset=utf-8" http-equiv="Content-Type"/>
<!-- Always force latest IE rendering engine or request Chrome Frame -->
<meta content="IE=edge,chrome=1" http-equiv="X-UA-Compatible"/>
<script type="text/javascript">window.NREUM||(NREUM={});NREUM.info={"beacon":"bam.nr-data.net","errorBeacon":"bam.nr-data.net","licenseKey":"5e33925808","applicationID":"59562082","transactionName":"JVcPR0MLWApSRU1eAQVVEhxSC1oSUlkWbBMHXwRAHhdcCUA=","queueTime":0,"applicationTime":166,"agent":""}</script>
<script type="text/javascript">(window.NREUM||(NREUM={})).loader_config={xpid:"VQcPUlZTDxAFXVRUBQEPVA=="};window.NREUM||(NREUM={}),__nr_require=function(t,n,e){function r(e){if(!n[e]){var o=n[e]={exports:{}};t[e][0].call(o.exports,function(n){var o=t[e][1][n];return r(o||n)},o,o.exports)}return n[e].exports}if("function"==typeof __nr_require)return __nr_require;for(var o=0;o<e.length;o++)r(e[o

In [6]:
# Return article title
news_title = soup.title.get_text()
news_title

'News  – NASA’s Mars Exploration Program '

In [7]:
# Return article body
news_bod = soup.p.get_text()
news_bod

'Managed by the Mars Exploration Program and the Jet Propulsion Laboratory for NASA’s Science Mission Directorate'

### Scraping NASA Site for Mars image

In [22]:
#  Chromedriver setup
executable_path = {'executable_path': 'C:/ChromeDriver/chromedriver.exe'}
browser = Browser('chrome', **executable_path, headless=False)

In [23]:
# Visit image url
image_url = ('https://www.jpl.nasa.gov/spaceimages/?search=&category=Mars')
browser.visit(image_url)

In [24]:
# Save featured image url
featured_image_url = ('https://www.jpl.nasa.gov/spaceimages/images/largesize/PIA16729_hires.jpg')

### Scraping MarsWxReport Twitter page

In [11]:
# Visit Twitter
twt_url = ('https://twitter.com/marswxreport?lang=en')
browser.visit(twt_url)

In [12]:
# Retrieve data from request
response_twt = requests.get(twt_url)

In [13]:
# Create BeautifulSoup object
soup_twt = bs(response_twt.text,'html.parser')

In [14]:
# Print BeautifulSoup object
soup_twt

<!DOCTYPE html>

<html data-scribe-reduced-action-queue="true" lang="en">
<head>
<meta charset="utf-8"/>
<script nonce="0AlVE6aOMfMgl23qvjWyUA==">
        !function(){window.initErrorstack||(window.initErrorstack=[]),window.onerror=function(r,i,n,o,t){r.indexOf("Script error.")>-1||window.initErrorstack.push({errorMsg:r,url:i,lineNumber:n,column:o,errorObj:t})}}();
      </script>
<script id="bouncer_terminate_iframe" nonce="0AlVE6aOMfMgl23qvjWyUA==">
    if (window.top != window) {
  window.top.postMessage({'bouncer': true, 'event': 'complete'}, '*');
}
  </script>
<script id="swift_action_queue" nonce="0AlVE6aOMfMgl23qvjWyUA==">
    !function(){function e(e){if(e||(e=window.event),!e)return!1;if(e.timestamp=(new Date).getTime(),!e.target&&e.srcElement&&(e.target=e.srcElement),document.documentElement.getAttribute("data-scribe-reduced-action-queue"))for(var t=e.target;t&&t!=document.body;){if("A"==t.tagName)return;t=t.parentNode}return i("all",o(e)),a(e)?(document.addEventListener||(e

In [15]:
# Retrieve latest tweet, class found by manual searching in BS object
latest_tweet = soup_twt.find('p', class_ = 'TweetTextSize TweetTextSize--normal js-tweet-text tweet-text')

In [16]:
# Pull text from latest tweet, print
mars_weather = latest_tweet.get_text()
mars_weather

'InSight sol 216 (2019-07-06) low -102.5ºC (-152.5ºF) high -24.9ºC (-12.8ºF)\nwinds from the SSE at 4.6 m/s (10.2 mph) gusting to 15.8 m/s (35.4 mph)\npressure at 7.60 hPapic.twitter.com/fwGott1wRL'

### Scraping Space Facts site for Mars facts with Pandas

In [46]:
# Scrape facts site with pandas
facts_url = ('https://space-facts.com/mars/')
facts_table = pd.read_html(facts_url)
facts_table

[                      0                              1
 0  Equatorial Diameter:                       6,792 km
 1       Polar Diameter:                       6,752 km
 2                 Mass:  6.42 x 10^23 kg (10.7% Earth)
 3                Moons:            2 (Phobos & Deimos)
 4       Orbit Distance:       227,943,824 km (1.52 AU)
 5         Orbit Period:           687 days (1.9 years)
 6  Surface Temperature:                  -153 to 20 °C
 7         First Record:              2nd millennium BC
 8          Recorded By:           Egyptian astronomers]

In [64]:
# Set up facts dataframe
facts_df = facts_table[0]
facts_df.columns = ['Attribute', 'Record']
facts_df.set_index('Attribute', inplace=True)
facts_df

Unnamed: 0_level_0,Record
Attribute,Unnamed: 1_level_1
Equatorial Diameter:,"6,792 km"
Polar Diameter:,"6,752 km"
Mass:,6.42 x 10^23 kg (10.7% Earth)
Moons:,2 (Phobos & Deimos)
Orbit Distance:,"227,943,824 km (1.52 AU)"
Orbit Period:,687 days (1.9 years)
Surface Temperature:,-153 to 20 °C
First Record:,2nd millennium BC
Recorded By:,Egyptian astronomers


In [65]:
# Push pandas df to html table
html_facts = facts_df.to_html()
facts_df.to_html('facts_table.html')
html_facts

'<table border="1" class="dataframe">\n  <thead>\n    <tr style="text-align: right;">\n      <th></th>\n      <th>Record</th>\n    </tr>\n    <tr>\n      <th>Attribute</th>\n      <th></th>\n    </tr>\n  </thead>\n  <tbody>\n    <tr>\n      <th>Equatorial Diameter:</th>\n      <td>6,792 km</td>\n    </tr>\n    <tr>\n      <th>Polar Diameter:</th>\n      <td>6,752 km</td>\n    </tr>\n    <tr>\n      <th>Mass:</th>\n      <td>6.42 x 10^23 kg (10.7% Earth)</td>\n    </tr>\n    <tr>\n      <th>Moons:</th>\n      <td>2 (Phobos &amp; Deimos)</td>\n    </tr>\n    <tr>\n      <th>Orbit Distance:</th>\n      <td>227,943,824 km (1.52 AU)</td>\n    </tr>\n    <tr>\n      <th>Orbit Period:</th>\n      <td>687 days (1.9 years)</td>\n    </tr>\n    <tr>\n      <th>Surface Temperature:</th>\n      <td>-153 to 20 °C</td>\n    </tr>\n    <tr>\n      <th>First Record:</th>\n      <td>2nd millennium BC</td>\n    </tr>\n    <tr>\n      <th>Recorded By:</th>\n      <td>Egyptian astronomers</td>\n    </tr>\

### Scraping Mars Hemispheres Site

In [100]:
# Visit cerberus page
cerberus_url = ('https://astrogeology.usgs.gov/search/map/Mars/Viking/cerberus_enhanced')
browser.visit(cerberus_url)

# Retrieve data from request
response_cerberus = requests.get(cerberus_url)

# Create BeautifulSoup object
soup_cerberus = bs(response_cerberus.text,'html.parser')

# Retrieve Title
cerberus_title = soup_cerberus.find("h2", class_ = 'title').get_text()
cerberus_title

'Cerberus Hemisphere Enhanced'

In [101]:
# Visit schiaparelli page
schia_url = ('https://astrogeology.usgs.gov/search/map/Mars/Viking/schiaparelli_enhanced')
browser.visit(schia_url)

# Retrieve data from request
response_schia = requests.get(schia_url)

# Create BeautifulSoup object
soup_schia = bs(response_schia.text,'html.parser')

# Retrieve Title
schia_title = soup_schia.find("h2", class_ = 'title').get_text()
schia_title

'Schiaparelli Hemisphere Enhanced'

In [102]:
# Visit syrtis page
syrtis_url = ('https://astrogeology.usgs.gov/search/map/Mars/Viking/syrtis_major_enhanced')
browser.visit(syrtis_url)

# Retrieve data from request
response_syrtis = requests.get(syrtis_url)

# Create BeautifulSoup object
soup_syrtis = bs(response_syrtis.text,'html.parser')

# Retrieve Title
syrtis_title = soup_syrtis.find("h2", class_ = 'title').get_text()
syrtis_title

'Syrtis Major Hemisphere Enhanced'

In [103]:
# Visit valles page
valles_url = ('https://astrogeology.usgs.gov/search/map/Mars/Viking/valles_marineris_enhanced')
browser.visit(valles_url)

# Retrieve data from request
response_valles = requests.get(valles_url)

# Create BeautifulSoup object
soup_valles = bs(response_valles.text,'html.parser')

# Retrieve Title
valles_title = soup_valles.find("h2", class_ = 'title').get_text()
valles_title

'Valles Marineris Hemisphere Enhanced'

In [105]:
# Define hemisphere image urls 
cerberus_img = ('https://astrogeology.usgs.gov/cache/images/cfa62af2557222a02478f1fcd781d445_cerberus_enhanced.tif_full.jpg')
schia_img = ('https://astrogeology.usgs.gov/cache/images/3cdd1cbf5e0813bba925c9030d13b62e_schiaparelli_enhanced.tif_full.jpg')
syrtis_img = ('https://astrogeology.usgs.gov/cache/images/ae209b4e408bb6c3e67b6af38168cf28_syrtis_major_enhanced.tif_full.jpg')
valles_img = ('https://astrogeology.usgs.gov/cache/images/7cf2da4bf549ed01c17f206327be4db7_valles_marineris_enhanced.tif_full.jpg')

In [111]:
# Create list of dictionaries to store titles and image urls
hemisphere_image_urls = [
    {"Title" : cerberus_title, "Snapshot" : cerberus_img},
    {"Title" : schia_title, "Snapshot" : schia_img},
    {"Title" : syrtis_title, "Snapshot" : syrtis_img},
    {"Title" : valles_title, "Snapshot" : valles_img}
]

In [115]:
# Print list of hemisphere image dictionaries
hemisphere_image_urls

[{'Title': 'Cerberus Hemisphere Enhanced',
  'Snapshot': 'https://astrogeology.usgs.gov/cache/images/cfa62af2557222a02478f1fcd781d445_cerberus_enhanced.tif_full.jpg'},
 {'Title': 'Schiaparelli Hemisphere Enhanced',
  'Snapshot': 'https://astrogeology.usgs.gov/cache/images/3cdd1cbf5e0813bba925c9030d13b62e_schiaparelli_enhanced.tif_full.jpg'},
 {'Title': 'Syrtis Major Hemisphere Enhanced',
  'Snapshot': 'https://astrogeology.usgs.gov/cache/images/ae209b4e408bb6c3e67b6af38168cf28_syrtis_major_enhanced.tif_full.jpg'},
 {'Title': 'Valles Marineris Hemisphere Enhanced',
  'Snapshot': 'https://astrogeology.usgs.gov/cache/images/7cf2da4bf549ed01c17f206327be4db7_valles_marineris_enhanced.tif_full.jpg'}]