## Mission to Mars - Web Scrapping Using Python Libraries

### Section 1: Using BeautifulSoup / Splinter Libraries to Scrap NASA News

In [1]:
# import dependencies
import requests
from bs4 import BeautifulSoup as bs
from splinter import Browser
import os
import pandas as pd

In [2]:
# for Windows / Using ChromeDriver.exe
executable_path = {'executable_path': 'chromedriver.exe'}
browser = Browser('chrome', **executable_path, headless=False)

In [3]:
# read html from NASA website
url = "https://mars.nasa.gov/news/"
browser.visit(url)

In [4]:
# process html content
html = browser.html
# use bs to parse html into elements
soup = bs(html, 'html.parser')

In [5]:
# get news titles and news paragraphs
news_title = soup.find("div", class_="content_title").text
news_paragraph = soup.find("div", class_="article_teaser_body").text

In [6]:
print(f"News Title: {news_title}")
print(f"Brife: {news_paragraph}")

News Title: Curiosity Tastes First Sample in 'Clay-Bearing Unit'
Brife: This new region on Mars might reveal more about the role of water on Mount Sharp.


### Section 2: JPL Mars Featured Image

In [7]:
# use splinter functions https://splinter.readthedocs.io/en/latest/api/driver-and-element-api.html
base_url = 'https://www.jpl.nasa.gov'
mars_img_url = base_url + '/spaceimages/?search=&category=Mars'
browser.visit(mars_img_url)
browser.is_text_present('Full IMAGE')
browser.click_link_by_partial_text('FULL IMAGE')

In [8]:
img_html = browser.html
img_soup = bs(img_html, 'lxml')

In [9]:
img_url = img_soup.find('div', class_='fancybox-inner').img['src']
img_url

'/spaceimages/images/mediumsize/PIA17175_ip.jpg'

In [10]:
featured_img_url = base_url + img_url
featured_img_url

'https://www.jpl.nasa.gov/spaceimages/images/mediumsize/PIA17175_ip.jpg'

### Section 3: Mars Weather - Twitter Scrapping

In [11]:
# scrape mars weather info from official twitter account page
mars_weather_url = 'https://twitter.com/marswxreport?lang=en'
browser.visit(mars_weather_url)

In [12]:
mars_weather_html = browser.html

In [13]:
mars_weather_soup = bs(mars_weather_html, 'lxml')

In [14]:
# mars_weather = mars_weather_soup.find('p', class_='tweet-text').text.replace('\n', '').split("pic")[0]
mars_weather = mars_weather_soup.find('p', class_='tweet-text').text.split("pic")[0]
print(mars_weather)

InSight sol 137 (2019-04-16) low -97.0ºC (-142.7ºF) high -15.9ºC (3.4ºF)
winds from the SW at 4.3 m/s (9.7 mph) gusting to 12.4 m/s (27.7 mph)
pressure at 7.30 hPa


### Section 4: Mars Facts

In [15]:
# gest mars fact table and export to html table
mars_facts_url = 'https://space-facts.com/mars/'

In [16]:
mars_facts_table = pd.read_html(mars_facts_url)
mars_facts_table

[                      0                              1
 0  Equatorial Diameter:                       6,792 km
 1       Polar Diameter:                       6,752 km
 2                 Mass:  6.42 x 10^23 kg (10.7% Earth)
 3                Moons:            2 (Phobos & Deimos)
 4       Orbit Distance:       227,943,824 km (1.52 AU)
 5         Orbit Period:           687 days (1.9 years)
 6  Surface Temperature:                  -153 to 20 °C
 7         First Record:              2nd millennium BC
 8          Recorded By:           Egyptian astronomers]

In [20]:
df = mars_facts_table[0]
df.columns = ['Names', 'Value']
df.drop

Unnamed: 0,Names,Value
0,Equatorial Diameter:,"6,792 km"
1,Polar Diameter:,"6,752 km"
2,Mass:,6.42 x 10^23 kg (10.7% Earth)
3,Moons:,2 (Phobos & Deimos)
4,Orbit Distance:,"227,943,824 km (1.52 AU)"
5,Orbit Period:,687 days (1.9 years)
6,Surface Temperature:,-153 to 20 °C
7,First Record:,2nd millennium BC
8,Recorded By:,Egyptian astronomers


In [19]:
# use pandas to convert data to html table string
mars_facts_html = df.to_html(index=False)
mars_facts_html

'<table border="1" class="dataframe">\n  <thead>\n    <tr style="text-align: right;">\n      <th>Value</th>\n    </tr>\n  </thead>\n  <tbody>\n    <tr>\n      <td>6,792 km</td>\n    </tr>\n    <tr>\n      <td>6,752 km</td>\n    </tr>\n    <tr>\n      <td>6.42 x 10^23 kg (10.7% Earth)</td>\n    </tr>\n    <tr>\n      <td>2 (Phobos &amp; Deimos)</td>\n    </tr>\n    <tr>\n      <td>227,943,824 km (1.52 AU)</td>\n    </tr>\n    <tr>\n      <td>687 days (1.9 years)</td>\n    </tr>\n    <tr>\n      <td>-153 to 20 °C</td>\n    </tr>\n    <tr>\n      <td>2nd millennium BC</td>\n    </tr>\n    <tr>\n      <td>Egyptian astronomers</td>\n    </tr>\n  </tbody>\n</table>'

In [None]:
# writing dataframe to html file - it would be nice to flatten column head into same row though
with open('mars_facts.html', 'w') as fo:
    df.to_html(fo)

### Mars Hemispheres - practice on click next page and scrape image url and description content

In [None]:
hemi_base_url = 'https://astrogeology.usgs.gov'
hemispheres_url = 'https://astrogeology.usgs.gov/search/results?q=hemisphere+enhanced&k1=target&v1=Mars'
browser.visit(hemispheres_url)

In [None]:
hemi_html = browser.html
hemi_soup = bs(hemi_html, 'html.parser')
items = hemi_soup.find_all('div', class_="item")
hemi_img_dict = []

In [None]:
for item in items:
    title = item.find('h3').text
    img_url = item.find('a', class_='itemLink product-item')['href']
    browser.visit(hemi_base_url + img_url)
    img_html = browser.html
    soup = bs(img_html, 'html.parser')
    img_url = hemi_base_url + soup.find('img', class_='wide-image')['src']
    hemi_img_dict.append(
        {
            "title":title,
            "img_url": img_url
        }
    )

hemi_img_dict