In [43]:
# Import BeautifulSoup
from bs4 import BeautifulSoup
import requests
import pandas as pd 
import pymongo

In [3]:
# Import Splinter and set the chromedriver path
from splinter import Browser
executable_path = {"executable_path": "/usr/local/bin/chromedriver"}
browser = Browser("chrome", **executable_path, headless=False)

## Get NASA Mars News

In [7]:
# URL of page to be scraped
url = 'https://mars.nasa.gov/news/?page=0&per_page=40&order=publish_date+desc%2Ccreated_at+desc&search=&category=19%2C165%2C184%2C204&blank_scope=Latest'
browser.visit(url)
html = browser.html
soup = BeautifulSoup(html, 'html.parser')

In [8]:
# Retrieve the title for the first article
results = soup.find('div', class_="content_title")

news_title = results.text.strip()
news_title

'Update on Opportunity Rover after Martian Dust Storm'

In [9]:
# Retrieve the lede for the first article
results = soup.find('div', class_="article_teaser_body")

news_p = results.text.strip()

news_p

"One month since increasing their commanding frequency, engineers have yet to hear from NASA's Opportunity rover."

In [8]:
# Alternative means for retrieving the lede based on the xpath generated by inspector
xpath = '//*[@id="page"]/div[2]/section/div/ul/li[1]/div/div/div[3]'
results = browser.find_by_xpath(xpath)
results.text

"One month since increasing their commanding frequency, engineers have yet to hear from NASA's Opportunity rover."

## Get JPL's Featured Mars Image

In [10]:
url = 'https://www.jpl.nasa.gov/spaceimages/?search=&category=Mars'
browser.visit(url)
html = browser.html
soup = BeautifulSoup(html, 'html.parser')
xpath = '//*[@id="page"]/section[3]/div/ul/li[1]/a'
results = browser.find_by_xpath(xpath)
featured_image_url = 'https://www.jpl.nasa.gov' + results["data-fancybox-href"]
featured_image_url

'https://www.jpl.nasa.gov/spaceimages/images/largesize/PIA22719_hires.jpg'

## Get Latest Mars Weather Report

In [38]:
url = 'https://twitter.com/marswxreport?lang=en'
browser.visit(url)
html = browser.html
soup = BeautifulSoup(html, 'html.parser')

results = soup.find_all('div', class_="js-tweet-text-container")
for result in results:
    mars_weather = result.text.strip()
    if mars_weather[0:4]== "Sol ":
        break
        
mars_weather

'Sol 2171 (2018-09-14), high -12C/10F, low -65C/-84F, pressure at 8.79 hPa, daylight 05:43-17:59'

## Get Table of Mars Facts

In [12]:
url = 'https://space-facts.com/mars/'

In [13]:
tables = pd.read_html(url)
mars_table = tables[0]
mars_table


Unnamed: 0,0,1
0,Equatorial Diameter:,"6,792 km"
1,Polar Diameter:,"6,752 km"
2,Mass:,6.42 x 10^23 kg (10.7% Earth)
3,Moons:,2 (Phobos & Deimos)
4,Orbit Distance:,"227,943,824 km (1.52 AU)"
5,Orbit Period:,687 days (1.9 years)
6,Surface Temperature:,-153 to 20 °C
7,First Record:,2nd millennium BC
8,Recorded By:,Egyptian astronomers


In [14]:
mars_table.set_index(0, inplace=True)
mars_html_table = mars_table.to_html()

mars_html_table

'<table border="1" class="dataframe">\n  <thead>\n    <tr style="text-align: right;">\n      <th></th>\n      <th>1</th>\n    </tr>\n    <tr>\n      <th>0</th>\n      <th></th>\n    </tr>\n  </thead>\n  <tbody>\n    <tr>\n      <th>Equatorial Diameter:</th>\n      <td>6,792 km</td>\n    </tr>\n    <tr>\n      <th>Polar Diameter:</th>\n      <td>6,752 km</td>\n    </tr>\n    <tr>\n      <th>Mass:</th>\n      <td>6.42 x 10^23 kg (10.7% Earth)</td>\n    </tr>\n    <tr>\n      <th>Moons:</th>\n      <td>2 (Phobos &amp; Deimos)</td>\n    </tr>\n    <tr>\n      <th>Orbit Distance:</th>\n      <td>227,943,824 km (1.52 AU)</td>\n    </tr>\n    <tr>\n      <th>Orbit Period:</th>\n      <td>687 days (1.9 years)</td>\n    </tr>\n    <tr>\n      <th>Surface Temperature:</th>\n      <td>-153 to 20 °C</td>\n    </tr>\n    <tr>\n      <th>First Record:</th>\n      <td>2nd millennium BC</td>\n    </tr>\n    <tr>\n      <th>Recorded By:</th>\n      <td>Egyptian astronomers</td>\n    </tr>\n  </tbody>\n

In [15]:
mars_html_table = mars_html_table.replace('\n', '')
mars_html_table

'<table border="1" class="dataframe">  <thead>    <tr style="text-align: right;">      <th></th>      <th>1</th>    </tr>    <tr>      <th>0</th>      <th></th>    </tr>  </thead>  <tbody>    <tr>      <th>Equatorial Diameter:</th>      <td>6,792 km</td>    </tr>    <tr>      <th>Polar Diameter:</th>      <td>6,752 km</td>    </tr>    <tr>      <th>Mass:</th>      <td>6.42 x 10^23 kg (10.7% Earth)</td>    </tr>    <tr>      <th>Moons:</th>      <td>2 (Phobos &amp; Deimos)</td>    </tr>    <tr>      <th>Orbit Distance:</th>      <td>227,943,824 km (1.52 AU)</td>    </tr>    <tr>      <th>Orbit Period:</th>      <td>687 days (1.9 years)</td>    </tr>    <tr>      <th>Surface Temperature:</th>      <td>-153 to 20 °C</td>    </tr>    <tr>      <th>First Record:</th>      <td>2nd millennium BC</td>    </tr>    <tr>      <th>Recorded By:</th>      <td>Egyptian astronomers</td>    </tr>  </tbody></table>'

In [17]:
mars_table.to_html('mars_table.html')

## Get Photos of Mars Hemispheres

In [21]:
# Brute force method by going directly to hemisphere URL

url = "https://astrogeology.usgs.gov/search/map/Mars/Viking/cerberus_enhanced"
browser.visit(url)
html = browser.html
soup = BeautifulSoup(html, 'html.parser')
result = soup.find('div', class_='downloads')
result = result.find('li')
result = result.find('a')
cerberus_link = result.get('href')
print(cerberus_link)

url = "https://astrogeology.usgs.gov/search/map/Mars/Viking/schiaparelli_enhanced"
browser.visit(url)

html = browser.html
soup = BeautifulSoup(html, 'html.parser')
result = soup.find('div', class_='downloads')
result = result.find('li')
result = result.find('a')
schiaparelli_link = result.get('href')
print(schiaparelli_link)

url = "https://astrogeology.usgs.gov/search/map/Mars/Viking/syrtis_major_enhanced"
browser.visit(url)

html = browser.html
soup = BeautifulSoup(html, 'html.parser')
result = soup.find('div', class_='downloads')
result = result.find('li')
result = result.find('a')
syrtis_major_link = result.get('href')
print(syrtis_major_link)

url = "https://astrogeology.usgs.gov/search/map/Mars/Viking/valles_marineris_enhanced"
browser.visit(url)

html = browser.html
soup = BeautifulSoup(html, 'html.parser')
result = soup.find('div', class_='downloads')
result = result.find('li')
result = result.find('a')
valles_marineris_link = result.get('href')
print(valles_marineris_link)

http://astropedia.astrogeology.usgs.gov/download/Mars/Viking/cerberus_enhanced.tif/full.jpg
http://astropedia.astrogeology.usgs.gov/download/Mars/Viking/schiaparelli_enhanced.tif/full.jpg
http://astropedia.astrogeology.usgs.gov/download/Mars/Viking/syrtis_major_enhanced.tif/full.jpg
http://astropedia.astrogeology.usgs.gov/download/Mars/Viking/valles_marineris_enhanced.tif/full.jpg


In [32]:
# Actually getting the 4 images and looping through to get titles and urls

url = "https://astrogeology.usgs.gov/search/results?q=hemisphere+enhanced&k1=target&v1=Mars"
url_base = "https://astrogeology.usgs.gov"
browser.visit(url)
hemisphere_images_urls =[]
html = browser.html
soup = BeautifulSoup(html, 'html.parser')
results = soup.find_all('div', class_="item")
for result in results:
    desc = result.find('h3').text
    desc =desc[:-9]

    anchor = result.find('a')
    anchor_link = anchor.get('href')

    url = url_base+anchor_link

    browser.visit(url)
    html = browser.html
    soup = BeautifulSoup(html, 'html.parser')
    htm = soup.find('div', class_='downloads')
    htm_li = htm.find('li')
    htm_li_a = htm_li.find('a')
    image_link = htm_li_a.get('href')

    hemisphere_images_urls.append({"title": desc, "img_url": image_link})
    
hemisphere_images_urls


[{'title': 'Cerberus Hemisphere',
  'img_url': 'http://astropedia.astrogeology.usgs.gov/download/Mars/Viking/cerberus_enhanced.tif/full.jpg'},
 {'title': 'Schiaparelli Hemisphere',
  'img_url': 'http://astropedia.astrogeology.usgs.gov/download/Mars/Viking/schiaparelli_enhanced.tif/full.jpg'},
 {'title': 'Syrtis Major Hemisphere',
  'img_url': 'http://astropedia.astrogeology.usgs.gov/download/Mars/Viking/syrtis_major_enhanced.tif/full.jpg'},
 {'title': 'Valles Marineris Hemisphere',
  'img_url': 'http://astropedia.astrogeology.usgs.gov/download/Mars/Viking/valles_marineris_enhanced.tif/full.jpg'}]

## Create a Python Dictionary of All Mars Data

In [41]:
mars_data = {
    'news_title': news_title,
    'news_p': news_p,
    'featured_image_url': featured_image_url,
    'mars_weather': mars_weather,
    'mars_html_table': mars_html_table,
    'hemisphere_images_urls': hemisphere_images_urls
}

mars_data

{'news_title': 'Update on Opportunity Rover after Martian Dust Storm',
 'news_p': "One month since increasing their commanding frequency, engineers have yet to hear from NASA's Opportunity rover.",
 'featured_image_url': 'https://www.jpl.nasa.gov/spaceimages/images/largesize/PIA22719_hires.jpg',
 'mars_weather': 'Sol 2171 (2018-09-14), high -12C/10F, low -65C/-84F, pressure at 8.79 hPa, daylight 05:43-17:59',
 'mars_html_table': '<table border="1" class="dataframe">  <thead>    <tr style="text-align: right;">      <th></th>      <th>1</th>    </tr>    <tr>      <th>0</th>      <th></th>    </tr>  </thead>  <tbody>    <tr>      <th>Equatorial Diameter:</th>      <td>6,792 km</td>    </tr>    <tr>      <th>Polar Diameter:</th>      <td>6,752 km</td>    </tr>    <tr>      <th>Mass:</th>      <td>6.42 x 10^23 kg (10.7% Earth)</td>    </tr>    <tr>      <th>Moons:</th>      <td>2 (Phobos &amp; Deimos)</td>    </tr>    <tr>      <th>Orbit Distance:</th>      <td>227,943,824 km (1.52 AU)</td>

## Create a Mongo Database with the Mars Data stored in it

In [45]:
# Initialize PyMongo to work with MongoDBs
conn = 'mongodb://localhost:27017'
client = pymongo.MongoClient(conn)


db = client.mars_db
collection = db.marsdata

collection.insert_one(mars_data)


<pymongo.results.InsertOneResult at 0x10caf5fc8>

## vvv Example Code vvv

In [None]:
# Visit the following URL
url = "https://en.wikipedia.org/wiki/Mars"
browser.visit(url)

In [None]:
# Design an XPATH selector to grab the "Mars in natural color in 2007" image on the right
xpath = '//*[@id="product-section"]/div[2]/div[1]/a/img'

In [None]:
# Use splinter to Click the "Mars in natural color in 2007" image 
# to bring up the full resolution image
results = browser.find_by_xpath(xpath)
img = results[0]
# img.click()

In [None]:
# # Scrape the browser into soup and use soup to find the full resolution image of mars
# # Save the image url to a variable called `img_url`
# browser.is_element_present_by_css("img.jpg", wait_time=1)
# html = browser.html
# soup = BeautifulSoup(html, 'html.parser')
# img_url = soup.find("img", class_="jpg")["src"]
# img_url
# if "http:" not in img_url: img_url = "http:"+img_url

In [None]:
# BONUS

# # Use the requests library to download and save the image from the `img_url` above
# import requests
# import shutil
# response = requests.get(img_url, stream=True)
# with open('img.png', 'wb') as out_file:
#     shutil.copyfileobj(response.raw, out_file)

In [None]:
# # Display the image with IPython.display
# from IPython.display import Image
# Image(url='img.png')