In [1]:
# Dependencies
import requests
from bs4 import BeautifulSoup
from splinter import Browser
from splinter.exceptions import ElementDoesNotExist
import pandas as pd

In [2]:
executable_path = {'executable_path': 'chromedriver.exe'}
browser = Browser('chrome', **executable_path, headless=False)

In [3]:
# URL of page to be scraped
url = 'https://mars.nasa.gov/news/'

browser.visit(url)

In [4]:
# Create BeautifulSoup object; parse with 'lxml'
html = browser.html
soup = BeautifulSoup(html, 'lxml')

# Retreiving the unordered list with its class name(as latest news are present in an unordered list)
unorder_lst = soup.find('ul', class_='item_list')

# Retreiving the first list item (as this will be the latest news)
news_lst = unorder_lst.find('li')

#Retreiving the latest news title
title = news_lst.find('div',class_='content_title')

#Storing the latest news title in a variable(after stripping all the extra data)
news_title = title.text.strip()

#Retreiving & Storing the latest news teaser in a variable(after stripping all the extra data)
teaser = news_lst.find('div',class_='article_teaser_body')
news_p = teaser.text.strip()
news_p

"While separated by half a century, NASA's Apollo 11 and Mars 2020 missions share the same historic goal: returning samples to Earth."

In [5]:
#JPL site
jpl_url = "https://www.jpl.nasa.gov/spaceimages/?search=&category=Mars"
browser.visit(jpl_url)

html = browser.html
soup = BeautifulSoup(html,'lxml')

featured_image = soup.find('div',class_='carousel_container')

featured_image_title = featured_image.find('footer')
featured_image_title

full_img = featured_image_title.find('a')['data-link']

full_img_url = 'https://www.jpl.nasa.gov' + full_img
# full_img_url

#Visiting the page that has the full image details
browser.visit(full_img_url)

html = browser.html
soup = BeautifulSoup(html,'lxml')

large_img = soup.find('figure',class_='lede')
 
large_img_url = large_img.find('a')['href']
# large_img_url

featured_image_url = 'https://www.jpl.nasa.gov' + large_img_url
featured_image_url

'https://www.jpl.nasa.gov/spaceimages/images/largesize/PIA17462_hires.jpg'

In [17]:
#Mars Weather on Twitter
mars_twitter_url = "https://twitter.com/marswxreport?lang=en"
browser.visit(mars_twitter_url)

html = browser.html
soup = BeautifulSoup(html,'lxml')

#Retreiving p tag with specified class name
stream_items = soup.find('p',class_='tweet-text')

#Retreiving the text of p tag which is the first element in contents (2nd being the <a> tag i.e child tag of p)
mars_weather = stream_items.contents[0]
# stream_items

'I’d say a plutonium-238 powered RTG qualifies the Curiosity and Mars2020 rovers as alternative fuel vehicles. You can explore these and other missions, rockets and more with JPL’s Spacecraft AR for IOS and Androhttps://www.jpl.nasa.gov/apps/'

In [7]:
#Removing the newline (\n) character from the retreived text.
mars_weather = mars_weather.replace('\n',' ')
mars_weather

'I’d say a plutonium-238 powered RTG qualifies the Curiosity and Mars2020 rovers as alternative fuel vehicles. You can explore these and other missions, rockets and more with JPL’s Spacecraft AR for IOS and Androhttps://www.jpl.nasa.gov/apps/'

In [8]:
#Mars facts webpage
mars_facts_url = 'https://space-facts.com/mars/'

mars_facts_table = pd.read_html(mars_facts_url)
mars_facts_table

[  Mars - Earth Comparison             Mars            Earth
 0               Diameter:         6,779 km        12,742 km
 1                   Mass:  6.39 × 10^23 kg  5.97 × 10^24 kg
 2                  Moons:                2                1
 3      Distance from Sun:   227,943,824 km   149,598,262 km
 4         Length of Year:   687 Earth days      365.24 days
 5            Temperature:    -153 to 20 °C      -88 to 58°C,
                       0                              1
 0  Equatorial Diameter:                       6,792 km
 1       Polar Diameter:                       6,752 km
 2                 Mass:  6.39 × 10^23 kg (0.11 Earths)
 3                Moons:            2 (Phobos & Deimos)
 4       Orbit Distance:       227,943,824 km (1.38 AU)
 5         Orbit Period:           687 days (1.9 years)
 6  Surface Temperature:                   -87 to -5 °C
 7         First Record:              2nd millennium BC
 8          Recorded By:           Egyptian astronomers]

In [9]:
#Fetching Mars Facts table
mars_facts_df = mars_facts_table[1]
mars_facts_df.columns = ['Facts','Values']
mars_facts_df

Unnamed: 0,Facts,Values
0,Equatorial Diameter:,"6,792 km"
1,Polar Diameter:,"6,752 km"
2,Mass:,6.39 × 10^23 kg (0.11 Earths)
3,Moons:,2 (Phobos & Deimos)
4,Orbit Distance:,"227,943,824 km (1.38 AU)"
5,Orbit Period:,687 days (1.9 years)
6,Surface Temperature:,-87 to -5 °C
7,First Record:,2nd millennium BC
8,Recorded By:,Egyptian astronomers


In [10]:
html_table = mars_facts_df.to_html()
html_table

'<table border="1" class="dataframe">\n  <thead>\n    <tr style="text-align: right;">\n      <th></th>\n      <th>Facts</th>\n      <th>Values</th>\n    </tr>\n  </thead>\n  <tbody>\n    <tr>\n      <th>0</th>\n      <td>Equatorial Diameter:</td>\n      <td>6,792 km</td>\n    </tr>\n    <tr>\n      <th>1</th>\n      <td>Polar Diameter:</td>\n      <td>6,752 km</td>\n    </tr>\n    <tr>\n      <th>2</th>\n      <td>Mass:</td>\n      <td>6.39 × 10^23 kg (0.11 Earths)</td>\n    </tr>\n    <tr>\n      <th>3</th>\n      <td>Moons:</td>\n      <td>2 (Phobos &amp; Deimos)</td>\n    </tr>\n    <tr>\n      <th>4</th>\n      <td>Orbit Distance:</td>\n      <td>227,943,824 km (1.38 AU)</td>\n    </tr>\n    <tr>\n      <th>5</th>\n      <td>Orbit Period:</td>\n      <td>687 days (1.9 years)</td>\n    </tr>\n    <tr>\n      <th>6</th>\n      <td>Surface Temperature:</td>\n      <td>-87 to -5 °C</td>\n    </tr>\n    <tr>\n      <th>7</th>\n      <td>First Record:</td>\n      <td>2nd millennium BC</t

In [11]:
new_html_table = html_table.replace('\n','')
new_html_table

'<table border="1" class="dataframe">  <thead>    <tr style="text-align: right;">      <th></th>      <th>Facts</th>      <th>Values</th>    </tr>  </thead>  <tbody>    <tr>      <th>0</th>      <td>Equatorial Diameter:</td>      <td>6,792 km</td>    </tr>    <tr>      <th>1</th>      <td>Polar Diameter:</td>      <td>6,752 km</td>    </tr>    <tr>      <th>2</th>      <td>Mass:</td>      <td>6.39 × 10^23 kg (0.11 Earths)</td>    </tr>    <tr>      <th>3</th>      <td>Moons:</td>      <td>2 (Phobos &amp; Deimos)</td>    </tr>    <tr>      <th>4</th>      <td>Orbit Distance:</td>      <td>227,943,824 km (1.38 AU)</td>    </tr>    <tr>      <th>5</th>      <td>Orbit Period:</td>      <td>687 days (1.9 years)</td>    </tr>    <tr>      <th>6</th>      <td>Surface Temperature:</td>      <td>-87 to -5 °C</td>    </tr>    <tr>      <th>7</th>      <td>First Record:</td>      <td>2nd millennium BC</td>    </tr>    <tr>      <th>8</th>      <td>Recorded By:</td>      <td>Egyptian astronomers</

In [12]:
#Mars Hemispheres (using class description and h3 text)
mars_hemispheres_url = 'https://astrogeology.usgs.gov/search/results?q=hemisphere+enhanced&k1=target&v1=Mars' 

browser.visit(mars_hemispheres_url)

html = browser.html
soup = BeautifulSoup(html,'lxml')

mars_hemispheres = soup.find_all('div',class_='description')
# mars_hemispheres.prettify()

hemispheres_text = []
hemisphere_url_list = []

#Iterate through the results
for hemisphere in mars_hemispheres:
    #Error handling
    try:
        hemi_url = hemisphere.find('h3')
        hems = hemi_url.text.strip()
        hemispheres_text.append(hems)
        
        browser.click_link_by_partial_text(hems)
        
        html = browser.html
        soup = BeautifulSoup(html,'lxml')
        
        hemis_image = soup.find('div',id="wide-image")
        hemis_image_url = hemis_image.find('img',class_='wide-image')['src']
        
        hemisphere_url_list.append('https://astrogeology.usgs.gov'+ hemis_image_url)
        print('https://astrogeology.usgs.gov'+ hemis_image_url)

    except Exception as e:
#         print(e)
        a_href_val = hemisphere.find('a')['href']
        
        miss_url = 'https://astrogeology.usgs.gov'+ a_href_val
        
        browser.visit(miss_url)
    
        html = browser.html
        soup = BeautifulSoup(html,'lxml')
        
        hemis_image = soup.find('div',id="wide-image")
        hemis_image_url = hemis_image.find('img',class_='wide-image')['src']
        
        hemisphere_url_list.append('https://astrogeology.usgs.gov'+ hemis_image_url)
        print('https://astrogeology.usgs.gov'+ hemis_image_url)
        
# browser.click_link_by_partial_text('Schiaparelli Hemisphere Enhanced')
# html = browser.html
# soup = BeautifulSoup(html,'lxml')
# # hemisphere_url_list
# soup


https://astrogeology.usgs.gov/cache/images/cfa62af2557222a02478f1fcd781d445_cerberus_enhanced.tif_full.jpg
https://astrogeology.usgs.gov/cache/images/3cdd1cbf5e0813bba925c9030d13b62e_schiaparelli_enhanced.tif_full.jpg
https://astrogeology.usgs.gov/cache/images/ae209b4e408bb6c3e67b6af38168cf28_syrtis_major_enhanced.tif_full.jpg
https://astrogeology.usgs.gov/cache/images/7cf2da4bf549ed01c17f206327be4db7_valles_marineris_enhanced.tif_full.jpg


#Mars Hemispheres using class item and img tag
mars_hemispheres_url = 'https://astrogeology.usgs.gov/search/results?q=hemisphere+enhanced&k1=target&v1=Mars' 

browser.visit(mars_hemispheres_url)

html = browser.html
soup = BeautifulSoup(html,'lxml')

mars_hemispheres = soup.find_all('div',class_='item')

a_href_lst = []

for i in mars_hemispheres:
    try:
        hemi_href = i.find('a',class_='itemLink product-item')
#         a_href_lst.append(hemi_href)
#         print(i)
        hemi_hrefs = i.find('a',class_='itemLink product-item')['href']
        a_href_lst.append(hemi_hrefs)
#         browser.click_link_by_partial_href(hemi_hrefs)
#         i.click_link_by_href(hemi_href)
#         print(hemi_hrefs)
        variable = browser.find_link_by_partial_href(hemi_hrefs)
        print(hemi_hrefs)
#         variable.click()
    except ElementDoesNotExist:
        print("Scraping Complete")

# /search/map/Mars/Viking/cerberus_enhanced
# /search/map/Mars/Viking/schiaparelli_enhanced
# /search/map/Mars/Viking/syrtis_major_enhanced
# /search/map/Mars/Viking/valles_marineris_enhanced
# browser.click_link_by_partial_href('/search/map/Mars/Viking/cerberus_enhanced')

In [13]:
hemispheres_text


['Cerberus Hemisphere Enhanced',
 'Schiaparelli Hemisphere Enhanced',
 'Syrtis Major Hemisphere Enhanced',
 'Valles Marineris Hemisphere Enhanced']

In [14]:
hemisphere_url_list

['https://astrogeology.usgs.gov/cache/images/cfa62af2557222a02478f1fcd781d445_cerberus_enhanced.tif_full.jpg',
 'https://astrogeology.usgs.gov/cache/images/3cdd1cbf5e0813bba925c9030d13b62e_schiaparelli_enhanced.tif_full.jpg',
 'https://astrogeology.usgs.gov/cache/images/ae209b4e408bb6c3e67b6af38168cf28_syrtis_major_enhanced.tif_full.jpg',
 'https://astrogeology.usgs.gov/cache/images/7cf2da4bf549ed01c17f206327be4db7_valles_marineris_enhanced.tif_full.jpg']

In [15]:
#Adding the texts and links to a dictionary
hemisphere_image_urls = []

for hemi in range(len(hemispheres_text)):
    hem_dict = {
        "title":hemispheres_text[hemi],
        "img_url":hemisphere_url_list[hemi]
    }
    hemisphere_image_urls.append(hem_dict)

hemisphere_image_urls

[{'title': 'Cerberus Hemisphere Enhanced',
  'img_url': 'https://astrogeology.usgs.gov/cache/images/cfa62af2557222a02478f1fcd781d445_cerberus_enhanced.tif_full.jpg'},
 {'title': 'Schiaparelli Hemisphere Enhanced',
  'img_url': 'https://astrogeology.usgs.gov/cache/images/3cdd1cbf5e0813bba925c9030d13b62e_schiaparelli_enhanced.tif_full.jpg'},
 {'title': 'Syrtis Major Hemisphere Enhanced',
  'img_url': 'https://astrogeology.usgs.gov/cache/images/ae209b4e408bb6c3e67b6af38168cf28_syrtis_major_enhanced.tif_full.jpg'},
 {'title': 'Valles Marineris Hemisphere Enhanced',
  'img_url': 'https://astrogeology.usgs.gov/cache/images/7cf2da4bf549ed01c17f206327be4db7_valles_marineris_enhanced.tif_full.jpg'}]