In [1]:
# Dependencies
from splinter import Browser
from bs4 import BeautifulSoup
import pandas as pd
import re

In [2]:
# Setting up chromedriver
executable_path = {'executable_path': '/usr/local/bin/chromedriver'}
browser = Browser('chrome', **executable_path, headless=False)

# Scraping

## Nasa Mars News 

In [153]:
# URL of page to be scraped
url = 'https://mars.nasa.gov/news/?page=0&per_page=40&order=publish_date+desc%2Ccreated_at+desc&search=&category=19%2C165%2C184%2C204&blank_scope=Latest'
browser.visit(url)

In [154]:
# HTML object
html = browser.html
# Parse HTML with Beautiful Soup
soup = BeautifulSoup(html, 'html.parser')
# Retrieve all elements that contain news information
news = soup.find('div', class_='list_text')

# Use Beautiful Soup's find() method to navigate and retrieve attributes
# Get the news title
content_title = news.find('div', class_="content_title")
news_title = content_title.find('a').text

# Get the news paragraph text
news_p = news.find('div', class_="article_teaser_body").text

print(news_title)
print(news_p)

How NASA's Perseverance Mars Team Adjusted to Work in the Time of Coronavirus 
Like much of the rest of the world, the Mars rover team is pushing forward with its mission-critical work while putting the health and safety of their colleagues and community first.


## JPL Mars Space Images

In [155]:
# URL of page to be scraped
url = 'https://www.jpl.nasa.gov/spaceimages/?search=&category=Mars'
browser.visit(url)

In [156]:
# HTML object
html = browser.html
# Parse HTML with Beautiful Soup
soup = BeautifulSoup(html, 'html.parser')
# Retrieve all elements that contain the first mars picture information
mars_latest_pic = soup.find('li', class_="slide")

# Dissect the anchor which contains the img link
anchor = mars_latest_pic.find('a')
image_link = anchor['data-fancybox-href']
featured_image_url = ('https://www.jpl.nasa.gov/' + image_link)

print(featured_image_url)

https://www.jpl.nasa.gov//spaceimages/images/largesize/PIA23844_hires.jpg


## Mars Weather

In [157]:
# URL of page to be scraped
url = 'https://twitter.com/marswxreport?lang=en'
browser.visit(url)

In [160]:
# HTML object
html = browser.html
# Parse HTML with Beautiful Soup
soup = BeautifulSoup(html, 'html.parser')
# Find latest (first) tweet using Regular Expression Patterns
mars_weather = soup.find("span", text=re.compile("InSight")).text

print(mars_weather)

InSight sol 500 (2020-04-22) low -93.8ºC (-136.8ºF) high -3.5ºC (25.6ºF)
winds from the WNW at 4.5 m/s (10.1 mph) gusting to 20.4 m/s (45.6 mph)
pressure at 6.70 hPa


## Mars Facts

In [161]:
# URL of page to be scraped
url = 'https://space-facts.com/mars/'

In [162]:
# Convert html table into pandas
mars_facts_tables = pd.read_html(url)
mars_facts_tables[0]

Unnamed: 0,0,1
0,Equatorial Diameter:,"6,792 km"
1,Polar Diameter:,"6,752 km"
2,Mass:,6.39 × 10^23 kg (0.11 Earths)
3,Moons:,2 (Phobos & Deimos)
4,Orbit Distance:,"227,943,824 km (1.38 AU)"
5,Orbit Period:,687 days (1.9 years)
6,Surface Temperature:,-87 to -5 °C
7,First Record:,2nd millennium BC
8,Recorded By:,Egyptian astronomers


In [163]:
# Save table into dataframe
mars_facts_df = mars_facts_tables[0]

In [164]:
# Convert dataframe into html and removed new line (\n) in code
mars_fact_html_table = mars_facts_df.to_html()
mars_fact_html_table = mars_fact_html_table.replace('\n', '')
mars_fact_html_table

'<table border="1" class="dataframe">  <thead>    <tr style="text-align: right;">      <th></th>      <th>0</th>      <th>1</th>    </tr>  </thead>  <tbody>    <tr>      <th>0</th>      <td>Equatorial Diameter:</td>      <td>6,792 km</td>    </tr>    <tr>      <th>1</th>      <td>Polar Diameter:</td>      <td>6,752 km</td>    </tr>    <tr>      <th>2</th>      <td>Mass:</td>      <td>6.39 × 10^23 kg (0.11 Earths)</td>    </tr>    <tr>      <th>3</th>      <td>Moons:</td>      <td>2 (Phobos &amp; Deimos)</td>    </tr>    <tr>      <th>4</th>      <td>Orbit Distance:</td>      <td>227,943,824 km (1.38 AU)</td>    </tr>    <tr>      <th>5</th>      <td>Orbit Period:</td>      <td>687 days (1.9 years)</td>    </tr>    <tr>      <th>6</th>      <td>Surface Temperature:</td>      <td>-87 to -5 °C</td>    </tr>    <tr>      <th>7</th>      <td>First Record:</td>      <td>2nd millennium BC</td>    </tr>    <tr>      <th>8</th>      <td>Recorded By:</td>      <td>Egyptian astronomers</td>    </

## Mars Hemispheres

In [3]:
# URL of page to be scraped
url = 'https://astrogeology.usgs.gov/search/results?q=hemisphere+enhanced&k1=target&v1=Mars'
browser.visit(url)

In [4]:
# HTML object
html = browser.html
# Parse HTML with Beautiful Soup
soup = BeautifulSoup(html, 'html.parser')
# Retrieve all elements that contain news information
results = soup.find_all('div', class_='item')

# Create empty list to store dictionary containing all img titles and urls
hemisphere_image_urls = []

# Iterate through each hemisphere link
for result in results:
    # Get url for each hemisphere result
    hemisphere_anchor = result.find('a')
    hemisphere_url = ('https://astrogeology.usgs.gov/') + hemisphere_anchor['href']

    # Go to hemisphere url
    browser.visit(hemisphere_url)
    # HTML object
    html = browser.html
    # Parse HTML with Beautiful Soup
    soup = BeautifulSoup(html, 'html.parser')

    # Get title for image
    title = soup.find('h2', class_='title').text
    title = title.rsplit(' ', 1)[0]
    
    # Get url for image
    downloads = soup.find('div', class_='downloads')
    list_tag = downloads.find('li')
    anchor_tag = list_tag.find('a')
    img_url = anchor_tag['href']
    
    mars_img_dict ={}
    mars_img_dict['title'] = title
    mars_img_dict['img_url'] = img_url
    hemisphere_image_urls.append(mars_img_dict)
    
    print(title)
    print(img_url)
    print('-------------------')

print(hemisphere_image_urls)

Cerberus Hemisphere
http://astropedia.astrogeology.usgs.gov/download/Mars/Viking/cerberus_enhanced.tif/full.jpg
-------------------
Schiaparelli Hemisphere
http://astropedia.astrogeology.usgs.gov/download/Mars/Viking/schiaparelli_enhanced.tif/full.jpg
-------------------
Syrtis Major Hemisphere
http://astropedia.astrogeology.usgs.gov/download/Mars/Viking/syrtis_major_enhanced.tif/full.jpg
-------------------
Valles Marineris Hemisphere
http://astropedia.astrogeology.usgs.gov/download/Mars/Viking/valles_marineris_enhanced.tif/full.jpg
-------------------
[{'title': 'Cerberus Hemisphere', 'img_url': 'http://astropedia.astrogeology.usgs.gov/download/Mars/Viking/cerberus_enhanced.tif/full.jpg'}, {'title': 'Schiaparelli Hemisphere', 'img_url': 'http://astropedia.astrogeology.usgs.gov/download/Mars/Viking/schiaparelli_enhanced.tif/full.jpg'}, {'title': 'Syrtis Major Hemisphere', 'img_url': 'http://astropedia.astrogeology.usgs.gov/download/Mars/Viking/syrtis_major_enhanced.tif/full.jpg'}, {'t

In [19]:
mars_data = {
    "hemisphere_image_urls": hemisphere_image_urls
}

mars_data['hemisphere_image_urls'][0]['img_url']

'http://astropedia.astrogeology.usgs.gov/download/Mars/Viking/cerberus_enhanced.tif/full.jpg'