# Mission to Mars | Step 2 | MongoDB and Flask Application

### Import Dependencies

In [None]:
# Jupyter Notebook Conversion to Python Script
#################################################

In [1]:
# Import dependencies and setup

from splinter import Browser
from bs4 import BeautifulSoup
import pandas as pd
import datetime as dt
import re
import time
import requests

In [2]:
# Set Executable Path & Initialize Chrome Browser

executable_path = {'executable_path': '/usr/local/bin/chromedriver'}
browser = Browser('chrome', **executable_path, headless=False)

In [None]:
#################################################
# NASA Mars News
#################################################

In [3]:
# NASA Mars News Site Web Scraper
def mars_news(browser):
    # Visit the NASA Mars News Site
    url = "https://mars.nasa.gov/news/"
    browser.visit(url)

In [None]:
# Get First List Item & Wait Half a Second If Not Immediately Present
    browser.is_element_present_by_css("ul.item_list li.slide", wait_time=0.5)

In [None]:
    html = browser.html
        news_soup = BeautifulSoup(html, "html.parser")

In [None]:
# Parse Results HTML with BeautifulSoup
    # Find Everything Inside:
    #   <ul class="item_list">
    #     <li class="slide">
    try:
        slide_element = news_soup.select_one("ul.item_list li.slide")
        slide_element.find("div", class_="content_title")

        # Scrape the Latest News Title
        # Use Parent Element to Find First <a> Tag and Save it as news_title
        news_title = slide_element.find("div", class_="content_title").get_text()

        news_paragraph = slide_element.find("div", class_="article_teaser_body").get_text()
    except AttributeError:
        return None, None
    return news_title, news_paragraph

In [None]:
#################################################
# JPL Mars Space Images - Featured Image
#################################################

In [None]:
# NASA JPL (Jet Propulsion Laboratory) Site Web Scraper
def featured_image(browser):
    # Visit the NASA JPL (Jet Propulsion Laboratory) Site
    url = "https://www.jpl.nasa.gov/spaceimages/?search=&category=Mars"
    browser.visit(url)

In [None]:
# Ask Splinter to Go to Site and Click Button with Class Name full_image
    # <button class="full_image">Full Image</button>
    full_image_button = browser.find_by_id("full_image")
    full_image_button.click()

In [None]:
# Find "More Info" Button and Click It
    browser.is_element_present_by_text("more info", wait_time=1)
    more_info_element = browser.find_link_by_partial_text("more info")
    more_info_element.click()

In [None]:
# Parse Results HTML with BeautifulSoup
    html = browser.html
    image_soup = BeautifulSoup(html, "html.parser")
    
    img = image_soup.select_one("figure.lede a img")
    try:
        img_url = img.get("src")
    except AttributeError:
        return None

In [None]:
# Use Base URL to Create Absolute URL
    img_url = f"https://www.jpl.nasa.gov{img_url}"
    return img_url

In [None]:
#################################################
# Mars Weather
#################################################

In [None]:
# Mars Weather Twitter Account Web Scraper
def twitter_weather(browser):
    # Visit the Mars Weather Twitter Account
    url = "https://twitter.com/marswxreport?lang=en"
    browser.visit(url)

In [None]:
# Parse Results HTML with BeautifulSoup
    html = browser.html
    weather_soup = BeautifulSoup(html, "html.parser")

In [None]:
# Find a Tweet with the name `Mars Weather` and extract weather info
    time.sleep(5)
    pattern = re.compile(r'InSight sol')
    mars_weather = weather_soup.find('span', text=pattern).text
    return mars_weather

In [None]:
#################################################
# Mars Facts
#################################################

In [None]:
# Mars Facts Web Scraper
# Visit the Mars Facts Site Using Pandas to Read
mars_facts = pd.read_html(url)[0]
print(mars_facts)
mars_facts.reset_index(inplace=True)
mars_facts.columns=["ID", "Properties", "Mars"]
mars_facts

In [None]:
#################################################
# Mars Hemispheres
#################################################

In [None]:
# Mars Hemispheres Web Scraper
def hemisphere(browser):
    # Visit the USGS Astrogeology Science Center Site
    url = "https://astrogeology.usgs.gov/search/results?q=hemisphere+enhanced&k1=target&v1=Mars"
    browser.visit(url)

    hemisphere_image_urls = []

In [None]:
# Get a List of All the Hemisphere
    links = browser.find_by_css("a.product-item h3")
    for item in range(len(links)):
        hemisphere = {}

In [None]:
# Find Element on Each Loop to Avoid a Stale Element Exception
        browser.find_by_css("a.product-item h3")[item].click()

In [None]:
# Find Sample Image Anchor Tag & Extract <href>
        sample_element = browser.find_link_by_text("Sample").first
        hemisphere["img_url"] = sample_element["href"]

In [None]:
# Get Hemisphere Title
        hemisphere["title"] = browser.find_by_css("h2.title").text

In [None]:
# Append Hemisphere Object to List
        hemisphere_image_urls.append(hemisphere)

In [None]:
# Navigate Backwards
        browser.back()
    return hemisphere_image_urls

In [None]:
# Helper Function
def scrape_hemisphere(html_text):
    hemisphere_soup = BeautifulSoup(html_text, "html.parser")
    try: 
        title_element = hemisphere_soup.find("h2", class_="title").get_text()
        sample_element = hemisphere_soup.find("a", text="Sample").get("href")
    except AttributeError:
        title_element = None
        sample_element = None 
    hemisphere = {
        "title": title_element,
        "img_url": sample_element
    }
    return hemisphere

In [None]:
#################################################
# Scraping All
#################################################

In [None]:
def scrape_all():
    executable_path = {"executable_path": "/Users/jorgesanchez/Downloads/chromedriver"}
    browser = Browser("chrome", **executable_path, headless=False)
    news_title, news_paragraph = mars_news(browser)
    img_url = featured_image(browser)
    mars_weather = twitter_weather(browser)
    facts = mars_facts
    hemisphere_image_urls = hemisphere(browser)
    timestamp = dt.datetime.now()

In [None]:
data = {
        "news_title": news_title,
        "news_paragraph": news_paragraph,
        "featured_image": img_url,
        "weather": mars_weather,
        "facts": facts,
        "hemispheres": hemisphere_image_urls,
        "last_modified": timestamp
    }
    browser.quit()
    return data

In [None]:
if __name__ == "__main__":
    print(scrape_all())