# Mission to Mars

In [14]:
#import dependencies
import pandas as pd
from bs4 import BeautifulSoup
import requests
import pymongo
from splinter import Browser
from webdriver_manager.chrome import ChromeDriverManager

# Database Set-Ups

In [2]:
#setup mongodb connection
conn = 'mongodb://localhost:27017'
client = pymongo.MongoClient(conn)

In [3]:
#define database and collection
db = client.mars_db
collection = db.news

# News Scraping

In [4]:
#define url to scrape for news
news_url = 'https://mars.nasa.gov/news/'

#get the page
news_response = requests.get(news_url)
#make beautifulsoup object
news_soup = BeautifulSoup(news_response.text, 'lxml')

In [5]:
#view the html
news_results = news_soup.find_all()
print(news_results)

[<html lang="en" xml:lang="en" xmlns="http://www.w3.org/1999/xhtml">
<head>
<meta content="text/html; charset=utf-8" http-equiv="Content-Type"/>
<!-- Always force latest IE rendering engine or request Chrome Frame -->
<meta content="IE=edge,chrome=1" http-equiv="X-UA-Compatible"/>
<!-- Responsiveness -->
<meta content="width=device-width, initial-scale=1.0" name="viewport"/>
<!-- Favicon -->
<link href="/apple-touch-icon.png" rel="apple-touch-icon" sizes="180x180"/>
<link href="/favicon-32x32.png" rel="icon" sizes="32x32" type="image/png"/>
<link href="/favicon-16x16.png" rel="icon" sizes="16x16" type="image/png"/>
<link href="/manifest.json" rel="manifest"/>
<link color="#e48b55" href="/safari-pinned-tab.svg" rel="mask-icon"/>
<meta content="#000000" name="theme-color"/>
<meta content="authenticity_token" name="csrf-param"/>
<meta content="UTY+5q/EBZ+D9YqKP/yTX6R8O6k/lQ0qkXD5CFb+YNQceIk/KlPPgAC7o/ottDX3fa+qwWMb1fkigrbjAuHfdg==" name="csrf-token"/>
<title>News  – NASA’s Mars Exploratio

In [6]:
#find the first title and paragraph text (classes pulled from inspecting code above)
news_title = news_soup.find(class_='content_title').text
news_p = news_soup.find(class_='rollover_description_inner').text

# Featured Mars Image

In [7]:
#set up splinter
executable_path = {'executable_path': ChromeDriverManager().install()}
browser = Browser('chrome', **executable_path, headless=False)



Current google-chrome version is 91.0.4472
Get LATEST driver version for 91.0.4472
Driver [C:\Users\Kaylie\.wdm\drivers\chromedriver\win32\91.0.4472.19\chromedriver.exe] found in cache


In [8]:
#define url to scrape for images
mars_img_url = 'https://data-class-jpl-space.s3.amazonaws.com/JPL_Space/index.html'
#run the mars image site
browser.visit(mars_img_url)

In [9]:
#browse through mars images site
html = browser.html
img_soup = BeautifulSoup(html, 'html.parser')

In [10]:
#pull the the featured image (class pulled from inspecting the url)
featured_image = img_soup.find('a', class_='showimg fancybox-thumbs')
featured_image_link = featured_image['href']
featured_image_url = 'https://data-class-jpl-space.s3.amazonaws.com/JPL_Space/' + featured_image_link

In [11]:
#close browser
browser.quit()

# Mars Facts

In [12]:
#mars facts url
facts_url = 'https://space-facts.com/mars/'

In [15]:
#read the content from the facts_url
fact_table = pd.read_html(facts_url)
fact_table

[                      0                              1
 0  Equatorial Diameter:                       6,792 km
 1       Polar Diameter:                       6,752 km
 2                 Mass:  6.39 × 10^23 kg (0.11 Earths)
 3                Moons:            2 (Phobos & Deimos)
 4       Orbit Distance:       227,943,824 km (1.38 AU)
 5         Orbit Period:           687 days (1.9 years)
 6  Surface Temperature:                   -87 to -5 °C
 7         First Record:              2nd millennium BC
 8          Recorded By:           Egyptian astronomers,
   Mars - Earth Comparison             Mars            Earth
 0               Diameter:         6,779 km        12,742 km
 1                   Mass:  6.39 × 10^23 kg  5.97 × 10^24 kg
 2                  Moons:                2                1
 3      Distance from Sun:   227,943,824 km   149,598,262 km
 4         Length of Year:   687 Earth days      365.24 days
 5            Temperature:     -87 to -5 °C      -88 to 58°C,
           

In [16]:
#pull just the first table of mars facts
facts_df = fact_table[0]
facts_df

Unnamed: 0,0,1
0,Equatorial Diameter:,"6,792 km"
1,Polar Diameter:,"6,752 km"
2,Mass:,6.39 × 10^23 kg (0.11 Earths)
3,Moons:,2 (Phobos & Deimos)
4,Orbit Distance:,"227,943,824 km (1.38 AU)"
5,Orbit Period:,687 days (1.9 years)
6,Surface Temperature:,-87 to -5 °C
7,First Record:,2nd millennium BC
8,Recorded By:,Egyptian astronomers


In [23]:
#rename the columns and re-index the df
facts_df.rename(columns={0: 'Fact', 1: 'Description'}, inplace=True)
facts_df.set_index('Fact', inplace=True)
facts_df

Unnamed: 0_level_0,Description
Fact,Unnamed: 1_level_1
Equatorial Diameter:,"6,792 km"
Polar Diameter:,"6,752 km"
Mass:,6.39 × 10^23 kg (0.11 Earths)
Moons:,2 (Phobos & Deimos)
Orbit Distance:,"227,943,824 km (1.38 AU)"
Orbit Period:,687 days (1.9 years)
Surface Temperature:,-87 to -5 °C
First Record:,2nd millennium BC
Recorded By:,Egyptian astronomers


In [24]:
#convert df to an html table
facts_html_table = facts_df.to_html()
facts_html_table

'<table border="1" class="dataframe">\n  <thead>\n    <tr style="text-align: right;">\n      <th></th>\n      <th>Description</th>\n    </tr>\n    <tr>\n      <th>Fact</th>\n      <th></th>\n    </tr>\n  </thead>\n  <tbody>\n    <tr>\n      <th>Equatorial Diameter:</th>\n      <td>6,792 km</td>\n    </tr>\n    <tr>\n      <th>Polar Diameter:</th>\n      <td>6,752 km</td>\n    </tr>\n    <tr>\n      <th>Mass:</th>\n      <td>6.39 × 10^23 kg (0.11 Earths)</td>\n    </tr>\n    <tr>\n      <th>Moons:</th>\n      <td>2 (Phobos &amp; Deimos)</td>\n    </tr>\n    <tr>\n      <th>Orbit Distance:</th>\n      <td>227,943,824 km (1.38 AU)</td>\n    </tr>\n    <tr>\n      <th>Orbit Period:</th>\n      <td>687 days (1.9 years)</td>\n    </tr>\n    <tr>\n      <th>Surface Temperature:</th>\n      <td>-87 to -5 °C</td>\n    </tr>\n    <tr>\n      <th>First Record:</th>\n      <td>2nd millennium BC</td>\n    </tr>\n    <tr>\n      <th>Recorded By:</th>\n      <td>Egyptian astronomers</td>\n    </tr>\n

In [28]:
#remove unnecessary new lines
facts_html_table.replace('\n', '')

'<table border="1" class="dataframe">  <thead>    <tr style="text-align: right;">      <th></th>      <th>Description</th>    </tr>    <tr>      <th>Fact</th>      <th></th>    </tr>  </thead>  <tbody>    <tr>      <th>Equatorial Diameter:</th>      <td>6,792 km</td>    </tr>    <tr>      <th>Polar Diameter:</th>      <td>6,752 km</td>    </tr>    <tr>      <th>Mass:</th>      <td>6.39 × 10^23 kg (0.11 Earths)</td>    </tr>    <tr>      <th>Moons:</th>      <td>2 (Phobos &amp; Deimos)</td>    </tr>    <tr>      <th>Orbit Distance:</th>      <td>227,943,824 km (1.38 AU)</td>    </tr>    <tr>      <th>Orbit Period:</th>      <td>687 days (1.9 years)</td>    </tr>    <tr>      <th>Surface Temperature:</th>      <td>-87 to -5 °C</td>    </tr>    <tr>      <th>First Record:</th>      <td>2nd millennium BC</td>    </tr>    <tr>      <th>Recorded By:</th>      <td>Egyptian astronomers</td>    </tr>  </tbody></table>'

# Mars Hemispheres

In [32]:
#define urls to scrape for images
cerberus_url = 'https://astrogeology.usgs.gov/search/map/Mars/Viking/cerberus_enhanced'
schiaparelli_url = 'https://astrogeology.usgs.gov/search/map/Mars/Viking/schiaparelli_enhanced'
syrtis_major_url = 'https://astrogeology.usgs.gov/search/map/Mars/Viking/syrtis_major_enhanced'
valles_marineris_url = 'https://astrogeology.usgs.gov/search/map/Mars/Viking/valles_marineris_enhanced'

#hold all urls in one list to iterate through
hemisphere_urls = [cerberus_url, schiaparelli_url, syrtis_major_url, valles_marineris_url]

In [37]:
#list to hold image urls
hemi_img_urls = []

#iterate through url list
for hemisphere in hemisphere_urls:
    #get the page
    response = requests.get(hemisphere)
    #make beautifulsoup object
    soup = BeautifulSoup(response.text, 'lxml')
    #find the url (divs and classes found by inspecting the urls)
    #variables named for the level of html they are on
    div = soup.find('div', class_='downloads')
    ul = div.find('ul')
    li = ul.find('li')
    a = li.find('a')
    href = a['href']
    #append href to hemi_img_urls list
    hemi_img_urls.append(href)

In [39]:
#save hemisphere titles to list
hemisphere_titles = ['Cerberus Hemisphere', 'Schiaparelli Hemisphere', 'Syrtis Major Hemisphere', 'Valles Marineris Hemisphere']

#put titles to their mathcing urls in dictionaries
hemi_img_url_dicts = [
    {'title': hemisphere_titles[0], 'img_url': hemi_img_urls[0]},
    {'title': hemisphere_titles[1], 'img_url': hemi_img_urls[1]},
    {'title': hemisphere_titles[2], 'img_url': hemi_img_urls[2]},
    {'title': hemisphere_titles[3], 'img_url': hemi_img_urls[3]},
]