# Web Scraping Homework - Mission to Mars

In [1]:
from bs4 import BeautifulSoup as bs
import pandas as pd
import requests
from splinter import Browser
import time

In [2]:
executable_path = {'executable_path': 'C:\ProgramData\ChromeDriver\chromedriver'}
browser = Browser('chrome', **executable_path, headless=False)

## NASA Mars News

In [3]:
# url for scraping
mars_news_url = 'https://mars.nasa.gov/news/'
browser.visit(mars_news_url)

# HTML object
mars_news_html = browser.html

# Parse HTML with Beautiful Soup
mars_news_soup = bs(mars_news_html, 'html.parser')

# wait 15 seconds before running next block
time.sleep(15)

In [4]:
# collect the latest News Title and Paragraph Text
mars_news_find = mars_news_soup.find("div", class_="list_text")

news_title = mars_news_find.find('div', class_='content_title').text

news_p = mars_news_find.find('div', class_='article_teaser_body').text 

print(news_title)
print(news_p)

5 Hidden Gems Are Riding Aboard NASA's Perseverance Rover
The symbols, mottos, and small objects added to the agency's newest Mars rover serve a variety of purposes, from functional to decorative.


## JPL Mars Space Images - Featured Image

In [5]:
# url for scraping
jpl_base_url = 'https://www.jpl.nasa.gov/'
jpl_suffix_url = 'spaceimages/?search=&category=Mars'
jpl_img_url = jpl_base_url+jpl_suffix_url

browser.visit(jpl_img_url)

# HTML object
jpl_img_html = browser.html

# Parse HTML with Beautiful Soup
jpl_img_soup = bs(jpl_img_html, 'html.parser')

In [6]:
# find the div class "carousel_container"
jpl_img_find = jpl_img_soup.find("div", class_="carousel_container")

# find the img url in the ariticle class carousel_item in the style 
image_url = jpl_img_find.find("article", class_="carousel_item")["style"].\
        replace("'",'').\
        replace(";",'').\
        replace(")",'').\
        replace("(",'').\
        replace("background-image: url/",'')

# concatenate the base and img urls
featured_image_url = jpl_base_url + image_url

print(featured_image_url)

https://www.jpl.nasa.gov/spaceimages/images/wallpaper/PIA14417-1920x1200.jpg


## Mars Facts

In [7]:
# declare the url for pandas to scrape
mars_facts_url = 'https://space-facts.com/mars/'

mars_facts_url_df = pd.read_html(mars_facts_url)

# inspect the data
mars_facts_url_df 

[                      0                              1
 0  Equatorial Diameter:                       6,792 km
 1       Polar Diameter:                       6,752 km
 2                 Mass:  6.39 × 10^23 kg (0.11 Earths)
 3                Moons:            2 (Phobos & Deimos)
 4       Orbit Distance:       227,943,824 km (1.38 AU)
 5         Orbit Period:           687 days (1.9 years)
 6  Surface Temperature:                   -87 to -5 °C
 7         First Record:              2nd millennium BC
 8          Recorded By:           Egyptian astronomers,
   Mars - Earth Comparison             Mars            Earth
 0               Diameter:         6,779 km        12,742 km
 1                   Mass:  6.39 × 10^23 kg  5.97 × 10^24 kg
 2                  Moons:                2                1
 3      Distance from Sun:   227,943,824 km   149,598,262 km
 4         Length of Year:   687 Earth days      365.24 days
 5            Temperature:     -87 to -5 °C      -88 to 58°C,
           

In [8]:
# take the first table of metrics
mars_clean_fact_df = mars_facts_url_df[0]

# rename colums
mars_clean_fact_df.columns = ["Fact", "Value"]
mars_clean_fact_df = mars_clean_fact_df.set_index("Fact")

mars_clean_fact_df 

Unnamed: 0_level_0,Value
Fact,Unnamed: 1_level_1
Equatorial Diameter:,"6,792 km"
Polar Diameter:,"6,752 km"
Mass:,6.39 × 10^23 kg (0.11 Earths)
Moons:,2 (Phobos & Deimos)
Orbit Distance:,"227,943,824 km (1.38 AU)"
Orbit Period:,687 days (1.9 years)
Surface Temperature:,-87 to -5 °C
First Record:,2nd millennium BC
Recorded By:,Egyptian astronomers


In [9]:
# generate an html file of the df to the Resources folder
mars_clean_fact_df.to_html('Resources\mars_facts.html')

# Mars Hemispheres

In [36]:
# declare the urls
mars_hemi_base_url = 'https://astrogeology.usgs.gov'
mars_hemi_suffix_url = '/search/results?q=hemisphere+enhanced&k1=target&v1=Mars'

mars_hemi_url = mars_hemi_base_url+mars_hemi_suffix_url

browser.visit(mars_hemi_url)

# HTML object
mars_hemi_html = browser.html

# Parse HTML with Beautiful Soup
mars_hemi_soup = bs(mars_hemi_html, 'html.parser')

print(mars_hemi_soup.prettify())

<html lang="en">
 <head>
  <link href="//ajax.googleapis.com/ajax/libs/jqueryui/1.11.3/themes/smoothness/jquery-ui.css" rel="stylesheet" type="text/css"/>
  <title>
   Astropedia Search Results | USGS Astrogeology Science Center
  </title>
  <meta content="USGS Astrogeology Science Center Astropedia search results." name="description"/>
  <meta content="USGS,Astrogeology Science Center,Cartography,Geology,Space,Geological Survey,Mapping" name="keywords"/>
  <meta content="IE=edge" http-equiv="X-UA-Compatible"/>
  <meta content="text/html; charset=utf-8" http-equiv="Content-Type"/>
  <meta content="width=device-width, initial-scale=1, maximum-scale=1" name="viewport"/>
  <meta content="x61hXXVj7wtfBSNOPnTftajMsZ5yB2W-qRoyr7GtOKM" name="google-site-verification"/>
  <!--<link rel="stylesheet" href="http://fonts.googleapis.com/css?family=Open+Sans:400italic,400,bold"/>-->
  <link href="/css/main.css" media="screen" rel="stylesheet"/>
  <link href="/css/print.css" media="print" rel="styles

In [37]:
# query the item div class
mars_hemi_find = mars_hemi_soup.find_all("div", class_="item")
print(mars_hemi_find)

[<div class="item"><a class="itemLink product-item" href="/search/map/Mars/Viking/cerberus_enhanced"><img alt="Cerberus Hemisphere Enhanced thumbnail" class="thumb" src="/cache/images/39d3266553462198bd2fbc4d18fbed17_cerberus_enhanced.tif_thumb.png"/></a><div class="description"><a class="itemLink product-item" href="/search/map/Mars/Viking/cerberus_enhanced"><h3>Cerberus Hemisphere Enhanced</h3></a><span class="subtitle" style="float:left">image/tiff 21 MB</span><span class="pubDate" style="float:right"></span><br/><p>Mosaic of the Cerberus hemisphere of Mars projected into point perspective, a view similar to that which one would see from a spacecraft. This mosaic is composed of 104 Viking Orbiter images acquired…</p></div> <!-- end description --></div>, <div class="item"><a class="itemLink product-item" href="/search/map/Mars/Viking/schiaparelli_enhanced"><img alt="Schiaparelli Hemisphere Enhanced thumbnail" class="thumb" src="/cache/images/08eac6e22c07fb1fe72223a79252de20_schiapar

In [46]:
# create a list to hold the dictionaries
hemisphere_image_urls =[]

for hemi in mars_hemi_find:
    # grab the title in the h3 tags
    title = hemi.find('h3').text
    # grab the href 
    hemi_img_suffix_url = hemi.find("a", class_="itemLink product-item")['href']
    hemi_img_url = mars_hemi_base_url+hemi_img_suffix_url
    
    browser.visit(hemi_img_url)

    # HTML object
    hemi_html = browser.html

    # Parse HTML with Beautiful Soup
    hemi_soup = bs(hemi_html, 'html.parser')
    
    # search the new link for the downloads div class
    hemi_find = hemi_soup.find("div", class_="downloads")
    # grab the first href a class
    img_url = hemi_find.find("a")["href"]
    # append dict to list
    hemisphere_image_urls.append({
                                    "title": title, 
                                    "img_url": img_url
                                 })

# display the title and img_url to the screen    
for images in hemisphere_image_urls:
    print(images)

{'title': 'Cerberus Hemisphere Enhanced', 'img_url': 'https://astropedia.astrogeology.usgs.gov/download/Mars/Viking/cerberus_enhanced.tif/full.jpg'}
{'title': 'Schiaparelli Hemisphere Enhanced', 'img_url': 'https://astropedia.astrogeology.usgs.gov/download/Mars/Viking/schiaparelli_enhanced.tif/full.jpg'}
{'title': 'Syrtis Major Hemisphere Enhanced', 'img_url': 'https://astropedia.astrogeology.usgs.gov/download/Mars/Viking/syrtis_major_enhanced.tif/full.jpg'}
{'title': 'Valles Marineris Hemisphere Enhanced', 'img_url': 'https://astropedia.astrogeology.usgs.gov/download/Mars/Viking/valles_marineris_enhanced.tif/full.jpg'}
