## Mission to Mars
- Let's scrape Mars data using BeautifulSoup from multiple sources! 
- We will store the results as a Python dictionary in the scrape.py file and present the data as a web app using Flask (app.py file)

In [1]:
# import dependencies
from bs4 import BeautifulSoup
from splinter import Browser
import requests

## Scrape NASA Mars News

In [2]:
# url for Mars News
url = 'https://mars.nasa.gov/news/'

# retrieve response and create Beautiful Soup object
response = requests.get(url)
news_soup = BeautifulSoup(response.text, 'lxml')

In [5]:
# confirm formatted version of soup
print(news_soup.prettify())

<!DOCTYPE html>
<html lang="en" xml:lang="en" xmlns="http://www.w3.org/1999/xhtml">
 <head>
  <meta content="text/html; charset=utf-8" http-equiv="Content-Type"/>
  <!-- Always force latest IE rendering engine or request Chrome Frame -->
  <meta content="IE=edge,chrome=1" http-equiv="X-UA-Compatible"/>
  <script type="text/javascript">
   window.NREUM||(NREUM={});NREUM.info={"beacon":"bam.nr-data.net","errorBeacon":"bam.nr-data.net","licenseKey":"5e33925808","applicationID":"59562082","transactionName":"JVcPR0MLWApSRU1eAQVVEhxSC1oSUlkWbBMHXwRAHhdcCUA=","queueTime":0,"applicationTime":228,"agent":""}
  </script>
  <script type="text/javascript">
   (window.NREUM||(NREUM={})).loader_config={xpid:"VQcPUlZTDxAFXVRUBQEPVA=="};window.NREUM||(NREUM={}),__nr_require=function(t,n,e){function r(e){if(!n[e]){var o=n[e]={exports:{}};t[e][0].call(o.exports,function(n){var o=t[e][1][n];return r(o||n)},o,o.exports)}return n[e].exports}if("function"==typeof __nr_require)return __nr_require;for(var o=0

In [56]:
# gather top news title
news_title = news_soup.find('div', class_='slide').find('div', class_='content_title').text
news_title = news_title.strip()

# confirm results
print(news_title)

NASA Garners 7 Webby Award Nominations


In [58]:
# gather top news description 
news_p = news_soup.find('div', class_='slide').find('div', class_='rollover_description_inner').text.rstrip()
news_p = news_p.strip()

# confirm results
print(news_p)

Nominees include four JPL projects: the solar system and climate websites, InSight social media, and a 360-degree Earth video. Public voting closes April 18, 2019.


## Scrape JPL Mars Space Featured Image

In [9]:
# check chromedriver in Mac
!which chromedriver

/usr/local/bin/chromedriver


In [10]:
# initialize chromedriver
executable_path = {'executable_path': '/usr/local/bin/chromedriver'}
browser = Browser('chrome', **executable_path, headless=False)

In [11]:
# url for Mars Space Featured Image
url = 'https://www.jpl.nasa.gov/spaceimages/?search=&category=Mars'
browser.visit(url)

In [12]:
# format version to soup 
html = browser.html
image_soup = BeautifulSoup(html, 'html.parser')

# confirm results
print(image_soup.prettify())

<html class="js flexbox canvas canvastext webgl no-touch geolocation postmessage websqldatabase indexeddb hashchange history draganddrop websockets rgba hsla multiplebgs backgroundsize borderimage borderradius boxshadow textshadow opacity cssanimations csscolumns cssgradients cssreflections csstransforms csstransforms3d csstransitions fontface generatedcontent video audio localstorage sessionstorage webworkers applicationcache svg inlinesvg smil svgclippaths -webkit-" style="">
 <!-- START HEADER: "DEFAULT" -->
 <head>
  <script async="" src="https://script.crazyegg.com/pages/scripts/0025/5267.js?432594" type="text/javascript">
  </script>
  <script src="//m.addthis.com/live/red_lojson/300lo.json?si=5cd320b87d60c201&amp;bkl=0&amp;bl=1&amp;pdt=1301&amp;sid=5cd320b87d60c201&amp;pub=&amp;rev=v8.4.4-wp&amp;ln=en&amp;pc=men&amp;cb=0&amp;ab=-&amp;dp=www.jpl.nasa.gov&amp;fp=spaceimages%2F%3Fsearch%3D%26category%3DMars&amp;fr=&amp;of=1&amp;pd=0&amp;irt=0&amp;vcl=0&amp;md=0&amp;ct=1&amp;tct=0&a

In [13]:
# gather feature image link
featured_img_link = image_soup.find('div', class_='carousel_items').find('a', class_="button fancybox").get('data-fancybox-href')
featured_img_url = 'https://www.jpl.nasa.gov' + featured_img_link 

# confirm results
print(featured_img_url)

https://www.jpl.nasa.gov/spaceimages/images/mediumsize/PIA17838_ip.jpg


## Scrape Mars Weather 

In [15]:
# url for Mars Weather 
url = 'https://twitter.com/marswxreport?lang=en'
response = requests.get(url)

# use BeautifulSoup to scrape web data
weather_soup = BeautifulSoup(response.text, 'lxml')

# confirm results
print(weather_soup.prettify())

<!DOCTYPE html>
<html data-scribe-reduced-action-queue="true" lang="en">
 <head>
  <meta charset="utf-8"/>
  <script nonce="rvAHAanjQnsh8PFh0H8TFw==">
   !function(){window.initErrorstack||(window.initErrorstack=[]),window.onerror=function(r,i,n,o,t){r.indexOf("Script error.")>-1||window.initErrorstack.push({errorMsg:r,url:i,lineNumber:n,column:o,errorObj:t})}}();
  </script>
  <script id="bouncer_terminate_iframe" nonce="rvAHAanjQnsh8PFh0H8TFw==">
   if (window.top != window) {
  window.top.postMessage({'bouncer': true, 'event': 'complete'}, '*');
}
  </script>
  <script id="swift_action_queue" nonce="rvAHAanjQnsh8PFh0H8TFw==">
   !function(){function e(e){if(e||(e=window.event),!e)return!1;if(e.timestamp=(new Date).getTime(),!e.target&&e.srcElement&&(e.target=e.srcElement),document.documentElement.getAttribute("data-scribe-reduced-action-queue"))for(var t=e.target;t&&t!=document.body;){if("A"==t.tagName)return;t=t.parentNode}return i("all",o(e)),a(e)?(document.addEventListener||(e=o(

In [50]:
# gather mars weather 
mars_weather = weather_soup.find('div', class_="js-tweet-text-container").text

# confirm results
print(mars_weather)


InSight sol 158 (2019-05-07) low -99.7ºC (-147.5ºF) high -21.8ºC (-7.2ºF)
winds from the SSE at 4.8 m/s (10.7 mph) gusting to 13.6 m/s (30.4 mph)
pressure at 7.50 hPapic.twitter.com/8SrPjAhpGZ



## Mars Facts 

In [60]:
import pandas as pd

In [61]:
# url for table of mars facts
url = 'https://space-facts.com/mars/'

In [62]:
# gather tables using Pandas
tables = pd.read_html(url)

# confirm tables
tables

[                      0                              1
 0  Equatorial Diameter:                       6,792 km
 1       Polar Diameter:                       6,752 km
 2                 Mass:  6.42 x 10^23 kg (10.7% Earth)
 3                Moons:            2 (Phobos & Deimos)
 4       Orbit Distance:       227,943,824 km (1.52 AU)
 5         Orbit Period:           687 days (1.9 years)
 6  Surface Temperature:                  -153 to 20 °C
 7         First Record:              2nd millennium BC
 8          Recorded By:           Egyptian astronomers]

In [63]:
# convert table into Panda DataFrame
df = tables[0]
df.columns = ['Description', 'Value']
df.set_index('Description', inplace=True)

# check dataframe
df

Unnamed: 0_level_0,Value
Description,Unnamed: 1_level_1
Equatorial Diameter:,"6,792 km"
Polar Diameter:,"6,752 km"
Mass:,6.42 x 10^23 kg (10.7% Earth)
Moons:,2 (Phobos & Deimos)
Orbit Distance:,"227,943,824 km (1.52 AU)"
Orbit Period:,687 days (1.9 years)
Surface Temperature:,-153 to 20 °C
First Record:,2nd millennium BC
Recorded By:,Egyptian astronomers


In [32]:
# convert dataframe to html
html_table = df.to_html()
html_table

'<table border="1" class="dataframe">\n  <thead>\n    <tr style="text-align: right;">\n      <th></th>\n      <th>Value</th>\n    </tr>\n    <tr>\n      <th>Description</th>\n      <th></th>\n    </tr>\n  </thead>\n  <tbody>\n    <tr>\n      <th>Equatorial Diameter:</th>\n      <td>6,792 km</td>\n    </tr>\n    <tr>\n      <th>Polar Diameter:</th>\n      <td>6,752 km</td>\n    </tr>\n    <tr>\n      <th>Mass:</th>\n      <td>6.42 x 10^23 kg (10.7% Earth)</td>\n    </tr>\n    <tr>\n      <th>Moons:</th>\n      <td>2 (Phobos &amp; Deimos)</td>\n    </tr>\n    <tr>\n      <th>Orbit Distance:</th>\n      <td>227,943,824 km (1.52 AU)</td>\n    </tr>\n    <tr>\n      <th>Orbit Period:</th>\n      <td>687 days (1.9 years)</td>\n    </tr>\n    <tr>\n      <th>Surface Temperature:</th>\n      <td>-153 to 20 °C</td>\n    </tr>\n    <tr>\n      <th>First Record:</th>\n      <td>2nd millennium BC</td>\n    </tr>\n    <tr>\n      <th>Recorded By:</th>\n      <td>Egyptian astronomers</td>\n    </tr>

In [33]:
# remove breaks in html table
html_table.replace('\n', '')

'<table border="1" class="dataframe">  <thead>    <tr style="text-align: right;">      <th></th>      <th>Value</th>    </tr>    <tr>      <th>Description</th>      <th></th>    </tr>  </thead>  <tbody>    <tr>      <th>Equatorial Diameter:</th>      <td>6,792 km</td>    </tr>    <tr>      <th>Polar Diameter:</th>      <td>6,752 km</td>    </tr>    <tr>      <th>Mass:</th>      <td>6.42 x 10^23 kg (10.7% Earth)</td>    </tr>    <tr>      <th>Moons:</th>      <td>2 (Phobos &amp; Deimos)</td>    </tr>    <tr>      <th>Orbit Distance:</th>      <td>227,943,824 km (1.52 AU)</td>    </tr>    <tr>      <th>Orbit Period:</th>      <td>687 days (1.9 years)</td>    </tr>    <tr>      <th>Surface Temperature:</th>      <td>-153 to 20 °C</td>    </tr>    <tr>      <th>First Record:</th>      <td>2nd millennium BC</td>    </tr>    <tr>      <th>Recorded By:</th>      <td>Egyptian astronomers</td>    </tr>  </tbody></table>'

In [None]:
# save table
#df.to_html('mars_table.html')

## Mars Hemispheres

In [2]:
# url of all four mars hemispheres
valles_url = 'https://astrogeology.usgs.gov/search/map/Mars/Viking/valles_marineris_enhanced'
syrtis_url = 'https://astrogeology.usgs.gov/search/map/Mars/Viking/syrtis_major_enhanced'
cerberus_url = 'https://astrogeology.usgs.gov/search/map/Mars/Viking/cerberus_enhanced'
schiaparelli_url = 'https://astrogeology.usgs.gov/search/map/Mars/Viking/schiaparelli_enhanced'

In [3]:
valles_response = requests.get(valles_url)
valles_soup = BeautifulSoup(valles_response.text, 'lxml')

syrtis_response = requests.get(syrtis_url)
syrtis_soup = BeautifulSoup(syrtis_response.text, 'lxml')

cerberus_response = requests.get(cerberus_url)
cerberus_soup = BeautifulSoup(cerberus_response.text, 'lxml')

schiaparelli_response = requests.get(schiaparelli_url)
schiaparelli_soup = BeautifulSoup(schiaparelli_response.text, 'lxml')

In [44]:
# create variable to store image link of each hemisphere
valles_img_link = valles_soup.find('div', class_="downloads").find('a').get('href')
syrtis_img_link = syrtis_soup.find('div', class_="wide-image-wrapper").find('a').get('href')
cerberus_img_link = cerberus_soup.find('div', class_="wide-image-wrapper").find('a').get('href')
schiaparelli_img_link = schiaparelli_soup.find('div', class_="wide-image-wrapper").find('a').get('href')

# confirm links
print(valles_img_link)
print(syrtis_img_link)
print(cerberus_img_link)
print(schiaparelli_img_link)

http://astropedia.astrogeology.usgs.gov/download/Mars/Viking/valles_marineris_enhanced.tif/full.jpg
http://astropedia.astrogeology.usgs.gov/download/Mars/Viking/syrtis_major_enhanced.tif/full.jpg
http://astropedia.astrogeology.usgs.gov/download/Mars/Viking/cerberus_enhanced.tif/full.jpg
http://astropedia.astrogeology.usgs.gov/download/Mars/Viking/schiaparelli_enhanced.tif/full.jpg


In [46]:
# create dictionary to store results
hemisphere_image_urls = [
    {"title": "Valles Marineris Hemisphere", "img_url": valles_img_link},
    {"title": "Cerberus Hemisphere", "img_url": cerberus_img_link},
    {"title": "Schiaparelli Hemisphere", "img_url": schiaparelli_img_link},
    {"title": "Syrtis Major Hemisphere", "img_url": syrtis_img_link},
]

# check dictionary
for x in hemisphere_image_urls:
    for key, value in x.items():
        print(key)
        print(value)

title
Valles Marineris Hemisphere
img_url
http://astropedia.astrogeology.usgs.gov/download/Mars/Viking/valles_marineris_enhanced.tif/full.jpg
title
Cerberus Hemisphere
img_url
http://astropedia.astrogeology.usgs.gov/download/Mars/Viking/cerberus_enhanced.tif/full.jpg
title
Schiaparelli Hemisphere
img_url
http://astropedia.astrogeology.usgs.gov/download/Mars/Viking/schiaparelli_enhanced.tif/full.jpg
title
Syrtis Major Hemisphere
img_url
http://astropedia.astrogeology.usgs.gov/download/Mars/Viking/syrtis_major_enhanced.tif/full.jpg
