In [1]:
# Dependencies
from splinter import Browser
from bs4 import BeautifulSoup
import pandas as pd

In [2]:
# Executable Path for Splinter
executable_path = {'executable_path': 'chromedriver.exe'}

In [3]:
# NASA Mars News ------------------------------------------------------------
# This website is Javascript-Rendered.  Thus it requires splinter, not just requests
url = 'https://mars.nasa.gov/news/'

In [4]:
# If Error From Page Not Rendering- Keep Trying 
keep_trying = True
tries = 0

while (keep_trying) & (tries < 20):
    
    # Visit url, get html, close
    browser = Browser('chrome', headless=False)
    browser.visit(url)
    html = browser.html
    browser.quit()
    
    # Create BeautifulSoup object; parse with 'html.parser'
    soup = BeautifulSoup(html, 'html.parser')
    
    try:
        # Get Title
        results = soup.find_all('div', class_="content_title")
        news_title = results[0].get_text()

        # Get Body
        results = soup.find_all('div', class_="article_teaser_body")
        news_p = results[0].get_text()

        keep_trying = False
    except:
        keep_trying = True
        tries += 1

if keep_trying:
    print('Tries Exceeded- Failure')
    news_title = 'Nasa Website Sucks'
    news_p = 'Why Does NASA Website HTML Structure Change With Same URL?'

In [5]:
print(news_title)
print(news_p)

NASA's Curiosity Rover Finds an Ancient Oasis on Mars
New evidence suggests salty, shallow ponds once dotted a Martian crater — a sign of the planet's drying climate.


In [6]:
# JPL Mars Space Images - Featured Image ------------------------------------------------------------
url = 'https://www.jpl.nasa.gov/spaceimages/?search=&category=Mars'

In [7]:
# Visit url, get html, close
browser = Browser('chrome', **executable_path, headless=False)
browser.visit(url)
html = browser.html
browser.quit()

In [8]:
# Create BeautifulSoup object; parse with 'html.parser'
soup = BeautifulSoup(html, 'html.parser')

In [9]:
# Get Image Link
results = soup.find_all('a', class_="button fancybox")

In [10]:
featured_image_url  = 'https://www.jpl.nasa.gov/' + results[0]['data-fancybox-href']
featured_image_url 

'https://www.jpl.nasa.gov//spaceimages/images/mediumsize/PIA19952_ip.jpg'

In [11]:
# Mars Weather------------------------------------------------------------
url = 'https://twitter.com/marswxreport?lang=en'

In [12]:
# Visit url, get html, close
browser = Browser('chrome', **executable_path, headless=False)
browser.visit(url)
html = browser.html
browser.quit()

In [13]:
# Create BeautifulSoup object; parse with 'html.parser'
soup = BeautifulSoup(html, 'html.parser')
# Drop Unneeded Tags
for a in soup("a"):
    a.decompose()

In [14]:
# Get Tweet Contents
results = soup.find_all('p', class_= 'tweet-text')
mars_weather = results[0].get_text()
mars_weather

'InSight sol 312 (2019-10-12) low -100.4ºC (-148.7ºF) high -26.0ºC (-14.8ºF)\nwinds from the SSW at 4.8 m/s (10.6 mph) gusting to 19.5 m/s (43.6 mph)\npressure at 7.20 hPa'

In [15]:
# Mars Facts------------------------------------------------------------
url = 'https://space-facts.com/mars/'

In [16]:
mars_df_list = pd.read_html(url)
mars_df_list

[  Mars - Earth Comparison             Mars            Earth
 0               Diameter:         6,779 km        12,742 km
 1                   Mass:  6.39 × 10^23 kg  5.97 × 10^24 kg
 2                  Moons:                2                1
 3      Distance from Sun:   227,943,824 km   149,598,262 km
 4         Length of Year:   687 Earth days      365.24 days
 5            Temperature:    -153 to 20 °C      -88 to 58°C,
                       0                              1
 0  Equatorial Diameter:                       6,792 km
 1       Polar Diameter:                       6,752 km
 2                 Mass:  6.39 × 10^23 kg (0.11 Earths)
 3                Moons:            2 (Phobos & Deimos)
 4       Orbit Distance:       227,943,824 km (1.38 AU)
 5         Orbit Period:           687 days (1.9 years)
 6  Surface Temperature:                   -87 to -5 °C
 7         First Record:              2nd millennium BC
 8          Recorded By:           Egyptian astronomers]

In [17]:
# Get Second Table
mars_df = mars_df_list[1]
mars_df = mars_df.rename(columns={0:'Attribute',1:'Mars Fact'})
path = 'mars_facts.html'
mars_df.to_html(path, index=False)
mars_df_html = mars_df.to_html()

In [18]:
# Mars Hemispheres------------------------------------------------------------
url = 'https://astrogeology.usgs.gov/search/results?q=hemisphere+enhanced&k1=target&v1=Mars'

In [19]:
# Visit url, get html, close
browser = Browser('chrome', **executable_path, headless=False)
browser.visit(url)
html = browser.html   
browser.quit()

In [20]:
# Parse HTML with Beautiful Soup
soup = BeautifulSoup(html, 'html.parser')

# Retrieve all Links
results = soup.find_all('a', {'class', 'itemLink product-item'})

# Get List of Unique Links To Visit
links = []
for result in results:
    link = result.get('href')
    links.append('https://astrogeology.usgs.gov/' + link)

In [21]:
# Filter For Unique URLs
links = list(set(links))
links

['https://astrogeology.usgs.gov//search/map/Mars/Viking/cerberus_enhanced',
 'https://astrogeology.usgs.gov//search/map/Mars/Viking/syrtis_major_enhanced',
 'https://astrogeology.usgs.gov//search/map/Mars/Viking/schiaparelli_enhanced',
 'https://astrogeology.usgs.gov//search/map/Mars/Viking/valles_marineris_enhanced']

In [22]:
hemisphere_image_urls = [None]*len(links)

visits = 0
# Visit Each URL
for link in links:
    url = link
    hemisphere = link.split('/')
    hemisphere = hemisphere[len(hemisphere)-1]
    hemisphere = hemisphere.split('_')
    hemisphere_name = hemisphere[0]
    if len(hemisphere) > 2:
        hemisphere_name = hemisphere_name + ' ' + hemisphere[1]
    print(url)
    print(hemisphere_name)
    
    # Visit url, get html, close
    browser = Browser('chrome', **executable_path, headless=False)
    browser.visit(url)
    html = browser.html   
    browser.quit()
    
    # Parse HTML with Beautiful Soup
    soup = BeautifulSoup(html, 'html.parser')

    # Retrieve all Jpg Links
    results = soup.find_all('div', {'class', 'downloads'})
    
    # Within Results, Filter Until a Jpg Link
    for result in results:
        sub_result = result.find_all('li')[0]
        jpg_link = sub_result.find('a').get('href')
        print(jpg_link)
        
    # Create New Dictionary Entry
    hemisphere_image_url = {'title': hemisphere_name.capitalize() + ' Hemisphere',
                           'img_url': jpg_link}
    
    # Extend List
    hemisphere_image_urls[visits] = hemisphere_image_url
    visits += 1

https://astrogeology.usgs.gov//search/map/Mars/Viking/cerberus_enhanced
cerberus
http://astropedia.astrogeology.usgs.gov/download/Mars/Viking/cerberus_enhanced.tif/full.jpg
https://astrogeology.usgs.gov//search/map/Mars/Viking/syrtis_major_enhanced
syrtis major
http://astropedia.astrogeology.usgs.gov/download/Mars/Viking/syrtis_major_enhanced.tif/full.jpg
https://astrogeology.usgs.gov//search/map/Mars/Viking/schiaparelli_enhanced
schiaparelli
http://astropedia.astrogeology.usgs.gov/download/Mars/Viking/schiaparelli_enhanced.tif/full.jpg
https://astrogeology.usgs.gov//search/map/Mars/Viking/valles_marineris_enhanced
valles marineris
http://astropedia.astrogeology.usgs.gov/download/Mars/Viking/valles_marineris_enhanced.tif/full.jpg


In [23]:
hemisphere_image_urls

[{'title': 'Cerberus Hemisphere',
  'img_url': 'http://astropedia.astrogeology.usgs.gov/download/Mars/Viking/cerberus_enhanced.tif/full.jpg'},
 {'title': 'Syrtis major Hemisphere',
  'img_url': 'http://astropedia.astrogeology.usgs.gov/download/Mars/Viking/syrtis_major_enhanced.tif/full.jpg'},
 {'title': 'Schiaparelli Hemisphere',
  'img_url': 'http://astropedia.astrogeology.usgs.gov/download/Mars/Viking/schiaparelli_enhanced.tif/full.jpg'},
 {'title': 'Valles marineris Hemisphere',
  'img_url': 'http://astropedia.astrogeology.usgs.gov/download/Mars/Viking/valles_marineris_enhanced.tif/full.jpg'}]