In [53]:
# Dependencies / Modules
from bs4 import BeautifulSoup as bs
import pymongo
from splinter import Browser
import requests
import time
import pandas as pd
import json

In [54]:
# Set up splinter browser
executable_path = {'executable_path': '/usr/local/bin/chromedriver'}
browser = Browser('chrome', **executable_path, headless = False)

# Visit url
url = "https://mars.nasa.gov/news/"
browser.visit(url)

# Pull html text and parse
html_code = browser.html
soup = BeautifulSoup(html_code, "html.parser")

## NASA Mars News

In [55]:
# Grab news title and paragraph text
news_title = soup.find('div', class_ = "bottom_gradient").text
news_p = soup.find('div', class_="rollover_description_inner").text

## JPL Mars Space Images - Featured Image

In [43]:
# Featured image url 
jpl_url = "https://www.jpl.nasa.gov/spaceimages/?search=&category=Mars"
browser.visit(jpl_url)

# navigate to link
browser.click_link_by_partial_text('FULL IMAGE')
time.sleep(10)
browser.click_link_by_partial_text('more info')

# get html code once at page
image_html = browser.html

# parse
soup = BeautifulSoup(image_html, "html.parser")

# find path and make full path
image_path = soup.find('figure', class_='lede').a['href']
featured_image_url = "https://www.jpl.nasa.gov/" + image_path


## Mars Weather

In [44]:
marsweather_url = "https://twitter.com/marswxreport?lang=en"
browser.visit(marsweather_url)

weather_html = browser.html

soup = BeautifulSoup(weather_html, 'html.parser')

mars_weather = soup.find('p', class_="TweetTextSize TweetTextSize--normal js-tweet-text tweet-text").text

## Mars Facts

In [45]:
# mars facts url and splinter visit
facts_url = "https://space-facts.com/mars/"
browser.visit(facts_url)

# get html
facts_html = browser.html

soup = BeautifulSoup(facts_html, 'html.parser')

# get the entire table
table_data = soup.find('table', class_="tablepress tablepress-id-mars")

#find all instances of table row
table_all = table_data.find_all('tr')

# set up lists to hold td elements which alternate between label and value
labels = []
values = []

# for each tr element append the first td element to labels and the second to values
for tr in table_all:
    td_elements = tr.find_all('td')
    labels.append(td_elements[0].text)
    values.append(td_elements[1].text)
    
# make a data frame and view
mars_facts_df = pd.DataFrame({
    "Label": labels,
    "Values": values
})

# mars facts dataframe
mars_facts_df

# get html code for DataFrame
fact_table = mars_facts_df.to_html(header = False, index = False)
fact_table

'<table border="1" class="dataframe">\n  <tbody>\n    <tr>\n      <td>Equatorial Diameter:</td>\n      <td>6,792 km\\n</td>\n    </tr>\n    <tr>\n      <td>Polar Diameter:</td>\n      <td>6,752 km\\n</td>\n    </tr>\n    <tr>\n      <td>Mass:</td>\n      <td>6.42 x 10^23 kg (10.7% Earth)</td>\n    </tr>\n    <tr>\n      <td>Moons:</td>\n      <td>2 (Phobos &amp; Deimos)</td>\n    </tr>\n    <tr>\n      <td>Orbit Distance:</td>\n      <td>227,943,824 km (1.52 AU)</td>\n    </tr>\n    <tr>\n      <td>Orbit Period:</td>\n      <td>687 days (1.9 years)\\n</td>\n    </tr>\n    <tr>\n      <td>Surface Temperature:</td>\n      <td>-153 to 20 °C</td>\n    </tr>\n    <tr>\n      <td>First Record:</td>\n      <td>2nd millennium BC</td>\n    </tr>\n    <tr>\n      <td>Recorded By:</td>\n      <td>Egyptian astronomers</td>\n    </tr>\n  </tbody>\n</table>'

## Mars Hemispheres

In [48]:
# Visit website
url = "https://astrogeology.usgs.gov/search/results?q=hemisphere+enhanced&k1=target&v1=Mars"
browser.visit(url)

time.sleep(1)

# Scrape page into Soup
html = browser.html
soup = bs(html, "html.parser")

sources = soup.find_all("a",{"class":"itemLink product-item"})
links = set(["https://astrogeology.usgs.gov" + link["href"] for link in sources])

hemisphere_image_urls = []
for link in links:
    browser.visit(link)
    time.sleep(1)
    
    html = browser.html
    soup = bs(html, "html.parser")
    
    link_dict = {}
    img_link = soup.find('img',{"class":"wide-image"})["src"]
    title = soup.find('h2', {"class":"title"}).text
    link_dict["title"] = title.split(" Enhanced",1)[0]
    link_dict["img_link"] = "https://astrogeology.usgs.gov" + img_link
    
    hemisphere_image_urls.append(link_dict)

In [52]:
# Store data in a dictionary
mars_data = {
    "news_title": news_title,
    "news_p": news_p,
    "featured_image_url": featured_image_url,
    "mars_weather": mars_weather,
    "html_table": fact_table,
    "hemisphere_image_urls": hemisphere_image_urls
}

# Close the browser after scraping
browser.quit()
print(json.dumps(mars_data, indent =4))

{
    "news_title": "Curiosity Tastes First Sample in 'Clay-Bearing Unit'",
    "news_p": "This new region on Mars might reveal more about the role of water on Mount Sharp.",
    "featured_image_url": "https://www.jpl.nasa.gov//spaceimages/images/largesize/PIA18273_hires.jpg",
    "mars_weather": "InSight sol 137 (2019-04-16) low -97.0\u00baC (-142.7\u00baF) high -15.9\u00baC (3.4\u00baF)\nwinds from the SW at 4.3 m/s (9.7 mph) gusting to 12.4 m/s (27.7 mph)\npressure at 7.30 hPapic.twitter.com/jhaMb7q9gy",
    "html_table": "<table border=\"1\" class=\"dataframe\">\n  <tbody>\n    <tr>\n      <td>Equatorial Diameter:</td>\n      <td>6,792 km\\n</td>\n    </tr>\n    <tr>\n      <td>Polar Diameter:</td>\n      <td>6,752 km\\n</td>\n    </tr>\n    <tr>\n      <td>Mass:</td>\n      <td>6.42 x 10^23 kg (10.7% Earth)</td>\n    </tr>\n    <tr>\n      <td>Moons:</td>\n      <td>2 (Phobos &amp; Deimos)</td>\n    </tr>\n    <tr>\n      <td>Orbit Distance:</td>\n      <td>227,943,824 km (1.52 AU