In [56]:
# Dependencies
from bs4 import BeautifulSoup
from splinter import Browser
import requests
import pymongo
import pandas as pd

In [57]:
# Initialize PyMongo to work with MongoDBs
conn = 'mongodb://localhost:27017'
client = pymongo.MongoClient(conn)

In [58]:
# URL of page to be scraped
url = 'https://mars.nasa.gov/news/'

In [59]:
# Retrieve page with the requests module
response = requests.get(url)
# Create BeautifulSoup object; parse with 'lxml'
soup = BeautifulSoup(response.text, 'lxml')

In [60]:
title_results = soup.find_all('div', class_='content_title')

In [61]:
title_results[0].text

"\n\nNASA's Mars 2020 Rover Closer to Getting Its Name\n\n"

In [62]:
description_results = soup.find_all('div', class_='rollover_description_inner')

In [63]:
description_results[0].text

"\n155 students from across the U.S. have been chosen as semifinalists in NASA's essay contest to name the Mars 2020 rover, and see it launch from Cape Canaveral this July.\n"

In [64]:
executable_path = {'executable_path': 'chromedriver.exe'}
browser = Browser('chrome', **executable_path, headless=False)

In [65]:
url = 'https://www.jpl.nasa.gov/spaceimages/?search=&category=Mars'
url_top = 'https://www.jpl.nasa.gov'
browser.visit(url)

In [66]:
browser.click_link_by_id('full_image')

In [67]:
image_html = browser.html

In [68]:
image_soup = BeautifulSoup(image_html, 'lxml')

In [69]:
image_div = image_soup.find_all('div', class_='fancybox-inner')
for image in image_div:
    img_tag = image.find('img')
    img_path = img_tag['src']
    print(img_path)

/spaceimages/images/mediumsize/PIA17357_ip.jpg


In [70]:
featured_image_url = url_top + img_path

In [71]:
featured_image_url

'https://www.jpl.nasa.gov/spaceimages/images/mediumsize/PIA17357_ip.jpg'

In [72]:
##Mars Weather Tweet
mars_tweet_url = 'https://twitter.com/marswxreport?lang=en'
tweet_response = requests.get(mars_tweet_url)
# Create BeautifulSoup object; parse with 'lxml'

tweet_soup = BeautifulSoup(tweet_response.text, 'lxml')
tweets = tweet_soup.find_all('p', class_="TweetTextSize TweetTextSize--normal js-tweet-text tweet-text")
latest_weather_tweet = tweets[0].text


In [73]:
#MARS FACTS Y'ALL!!!
url = 'https://space-facts.com/mars/'
mars_tables = pd.read_html(url)
mars_tables

[                      0                              1
 0  Equatorial Diameter:                       6,792 km
 1       Polar Diameter:                       6,752 km
 2                 Mass:  6.39 × 10^23 kg (0.11 Earths)
 3                Moons:            2 (Phobos & Deimos)
 4       Orbit Distance:       227,943,824 km (1.38 AU)
 5         Orbit Period:           687 days (1.9 years)
 6  Surface Temperature:                   -87 to -5 °C
 7         First Record:              2nd millennium BC
 8          Recorded By:           Egyptian astronomers,
   Mars - Earth Comparison             Mars            Earth
 0               Diameter:         6,779 km        12,742 km
 1                   Mass:  6.39 × 10^23 kg  5.97 × 10^24 kg
 2                  Moons:                2                1
 3      Distance from Sun:   227,943,824 km   149,598,262 km
 4         Length of Year:   687 Earth days      365.24 days
 5            Temperature:    -153 to 20 °C      -88 to 58°C,
           

In [74]:
mars_facts = mars_tables[0]
mars_facts.rename(columns ={0: 'Attribute'}, inplace = True )
mars_facts.rename(columns ={1: 'Value'}, inplace = True )
mars_facts.head()

Unnamed: 0,Attribute,Value
0,Equatorial Diameter:,"6,792 km"
1,Polar Diameter:,"6,752 km"
2,Mass:,6.39 × 10^23 kg (0.11 Earths)
3,Moons:,2 (Phobos & Deimos)
4,Orbit Distance:,"227,943,824 km (1.38 AU)"


In [75]:
mars_facts_html_table = mars_facts.to_html(index=False)
mars_facts_html_table

'<table border="1" class="dataframe">\n  <thead>\n    <tr style="text-align: right;">\n      <th>Attribute</th>\n      <th>Value</th>\n    </tr>\n  </thead>\n  <tbody>\n    <tr>\n      <td>Equatorial Diameter:</td>\n      <td>6,792 km</td>\n    </tr>\n    <tr>\n      <td>Polar Diameter:</td>\n      <td>6,752 km</td>\n    </tr>\n    <tr>\n      <td>Mass:</td>\n      <td>6.39 × 10^23 kg (0.11 Earths)</td>\n    </tr>\n    <tr>\n      <td>Moons:</td>\n      <td>2 (Phobos &amp; Deimos)</td>\n    </tr>\n    <tr>\n      <td>Orbit Distance:</td>\n      <td>227,943,824 km (1.38 AU)</td>\n    </tr>\n    <tr>\n      <td>Orbit Period:</td>\n      <td>687 days (1.9 years)</td>\n    </tr>\n    <tr>\n      <td>Surface Temperature:</td>\n      <td>-87 to -5 °C</td>\n    </tr>\n    <tr>\n      <td>First Record:</td>\n      <td>2nd millennium BC</td>\n    </tr>\n    <tr>\n      <td>Recorded By:</td>\n      <td>Egyptian astronomers</td>\n    </tr>\n  </tbody>\n</table>'

In [112]:
mars_hemispheres_url= 'https://astrogeology.usgs.gov/search/results?q=hemisphere+enhanced&k1=target&v1=Mars'
mars_hemispheres_url_top = 'https://astrogeology.usgs.gov'
hemisphere_list = []
hemi_browser = Browser('chrome', **executable_path, headless=False)
hemi_browser.visit(mars_hemispheres_url)
hemi_browser.click_link_by_partial_text('Cerberus')

#get the title
hemi_html = hemi_browser.html
hemi_soup = BeautifulSoup(hemi_html, 'lxml')
hemi_title = hemi_soup.find_all('h2', class_='title')
hemi_title_text = hemi_title[0].text

#get the image url
hemi_image = hemi_soup.find_all('img', class_="wide-image")
hemi_img_url = mars_hemispheres_url_top + hemi_image[0]['src']

# create dictionary and append to list
hemi_dict = {"title": hemi_title_text, "img_url": hemi_img_url}
hemisphere_list.append(hemi_dict)

In [113]:
hemi_browser.visit(mars_hemispheres_url)
hemi_browser.click_link_by_partial_text('Schiaparelli')

#get the title
hemi_html = hemi_browser.html
hemi_soup = BeautifulSoup(hemi_html, 'lxml')
hemi_title = hemi_soup.find_all('h2', class_='title')
hemi_title_text = hemi_title[0].text

#get the image url
hemi_image = hemi_soup.find_all('img', class_="wide-image")
hemi_img_url = mars_hemispheres_url_top + hemi_image[0]['src']

# create dictionary and append to list
hemi_dict = {"title": hemi_title_text, "img_url": hemi_img_url}
hemisphere_list.append(hemi_dict)

In [114]:
hemi_browser.visit(mars_hemispheres_url)
hemi_browser.click_link_by_partial_text('Syrtis Major')

#get the title
hemi_html = hemi_browser.html
hemi_soup = BeautifulSoup(hemi_html, 'lxml')
hemi_title = hemi_soup.find_all('h2', class_='title')
hemi_title_text = hemi_title[0].text

#get the image url
hemi_image = hemi_soup.find_all('img', class_="wide-image")
hemi_img_url = mars_hemispheres_url_top + hemi_image[0]['src']

# create dictionary and append to list
hemi_dict = {"title": hemi_title_text, "img_url": hemi_img_url}
hemisphere_list.append(hemi_dict)

In [115]:
hemi_browser.visit(mars_hemispheres_url)
hemi_browser.click_link_by_partial_text('Valles Marineris')

#get the title
hemi_html = hemi_browser.html
hemi_soup = BeautifulSoup(hemi_html, 'lxml')
hemi_title = hemi_soup.find_all('h2', class_='title')
hemi_title_text = hemi_title[0].text

#get the image url
hemi_image = hemi_soup.find_all('img', class_="wide-image")
hemi_img_url = mars_hemispheres_url_top + hemi_image[0]['src']

# create dictionary and append to list
hemi_dict = {"title": hemi_title_text, "img_url": hemi_img_url}
hemisphere_list.append(hemi_dict)

In [116]:
hemisphere_list

[{'title': 'Cerberus Hemisphere Enhanced',
  'img_url': 'https://astrogeology.usgs.gov/cache/images/cfa62af2557222a02478f1fcd781d445_cerberus_enhanced.tif_full.jpg'},
 {'title': 'Schiaparelli Hemisphere Enhanced',
  'img_url': 'https://astrogeology.usgs.gov/cache/images/3cdd1cbf5e0813bba925c9030d13b62e_schiaparelli_enhanced.tif_full.jpg'},
 {'title': 'Syrtis Major Hemisphere Enhanced',
  'img_url': 'https://astrogeology.usgs.gov/cache/images/ae209b4e408bb6c3e67b6af38168cf28_syrtis_major_enhanced.tif_full.jpg'},
 {'title': 'Valles Marineris Hemisphere Enhanced',
  'img_url': 'https://astrogeology.usgs.gov/cache/images/7cf2da4bf549ed01c17f206327be4db7_valles_marineris_enhanced.tif_full.jpg'}]