## importing dependancies

In [1]:
import os
import pandas as pd
from bs4 import BeautifulSoup as bs
import requests

In [2]:
#creating a function to create a soup object by taking in the URL
def make_soup(url):
    """ Requests module retrives the web page and BeautifulSoup converts it into the soup object"""
    response = requests.get(url)
    soup = bs(response.text, "html.parser")
    return soup

### scraping the main news title and its text

In [3]:
#Mars News site: "https://mars.nasa.gov/news/"
soup1 = make_soup("https://mars.nasa.gov/news/")

#selected the required element out of the page by inspecting the actual element
result1 = soup1.find('div', class_= "slide")

#required texts
news_title = (result1.find_all("a"))[1].text.strip()
news_p = (result1.find_all("a"))[0].text.strip()
print(news_title)
print(news_p)

Why This Martian Full Moon Looks Like Candy
For the first time, NASA's Mars Odyssey orbiter has caught the Martian moon Phobos during a full moon phase. Each color in this new image represents a temperature range detected by Odyssey's infrared camera.


### scraping the featured image from www.jpl.nasa.gov

In [4]:
#Featured image url to be extracted from "https://www.jpl.nasa.gov/spaceimages/?search=&category=Mars"
jpl_url = "https://www.jpl.nasa.gov/spaceimages/?search=&category=Mars"

In [5]:
#importing splinter module to browse the page
from splinter import Browser
executable_path = {'executable_path': '/usr/local/bin/chromedriver'}
browser = Browser('chrome', **executable_path, headless=False)
#visiting the browser
browser.visit(jpl_url)

In [6]:
#Click on the full image button
browser.click_link_by_partial_text('FULL IMAGE')
# HTML object
html = browser.html
# Parse HTML with Beautiful Soup
soup = bs(html, 'html.parser')
#quit the open browser
browser.quit()

In [7]:

featureimg = soup.find("div", class_="carousel_items").find("article", class_="carousel_item")["style"]
#stripping off unnecessary characters from the scraped object
jplimgurl = featureimg.strip("background-image: url();").strip("''")
#since the above url didn't come with main domain, we're going to concatenate it with the main domain
main_domain ="https://www.jpl.nasa.gov"

feature_img_url = main_domain+jplimgurl
print(feature_img_url)

https://www.jpl.nasa.gov/spaceimages/images/wallpaper/PIA19177-1920x1200.jpg


### scraping the mars weather from its twitter account

In [8]:
#Extracting Mars weather from its twitter account "https://twitter.com/marswxreport?lang=en"
soup3 = make_soup("https://twitter.com/marswxreport?lang=en")


In [9]:
latest_weather = soup3.find("p",class_="TweetTextSize TweetTextSize--normal js-tweet-text tweet-text")
# print(latest_weather.prettify())
mars_weather = latest_weather.text[:-26] #removing last 26 characters which had picture url
print(mars_weather)

InSight sol 167 (2019-05-17) low -100.5ºC (-148.9ºF) high -20.4ºC (-4.6ºF)
winds from the SW at 4.7 m/s (10.6 mph) gusting to 13.5 m/s (30.3 mph)
pressure at 7.50 hPa


### scraping the mars facts table

In [31]:
#Mars facts scraping from "http://space-facts.com/mars/"
fact_table = pd.read_html("https://space-facts.com/mars/")
mars_fact = fact_table[0]
mars_fact.columns = ["Attribute","Value"]
mars_fact.set_index(["Attribute"],inplace=True)
mars_fact.head()

Unnamed: 0_level_0,Value
Attribute,Unnamed: 1_level_1
Equatorial Diameter:,"6,792 km"
Polar Diameter:,"6,752 km"
Mass:,6.42 x 10^23 kg (10.7% Earth)
Moons:,2 (Phobos & Deimos)
Orbit Distance:,"227,943,824 km (1.52 AU)"


In [37]:
mars_facthtml = mars_fact.to_html()

### scraping the hemisphere images from astrogeology.usgs.gov

In [12]:
#Hemisphere images from "https://astrogeology.usgs.gov/search/results?q=hemisphere+enhanced&k1=target&v1=Mars"
soup4 = make_soup("https://astrogeology.usgs.gov/search/results?q=hemisphere+enhanced&k1=target&v1=Mars")
print(soup4.prettify())

<!DOCTYPE html>
<html lang="en">
 <head>
  <link href="//ajax.googleapis.com/ajax/libs/jqueryui/1.11.3/themes/smoothness/jquery-ui.css" rel="stylesheet" type="text/css"/>
  <title>
   Astropedia Search Results | USGS Astrogeology Science Center
  </title>
  <meta content="USGS Astrogeology Science Center Astropedia search results." name="description"/>
  <meta content="USGS,Astrogeology Science Center,Cartography,Geology,Space,Geological Survey,Mapping" name="keywords"/>
  <meta content="IE=edge" http-equiv="X-UA-Compatible"/>
  <meta content="text/html; charset=utf-8" http-equiv="Content-Type"/>
  <meta content="width=device-width, initial-scale=1, maximum-scale=1" name="viewport"/>
  <meta content="x61hXXVj7wtfBSNOPnTftajMsZ5yB2W-qRoyr7GtOKM" name="google-site-verification"/>
  <!--<link rel="stylesheet" href="http://fonts.googleapis.com/css?family=Open+Sans:400italic,400,bold"/>-->
  <link href="/css/main.css" media="screen" rel="stylesheet"/>
  <link href="/css/print.css" media="pr

In [23]:
#list of hemispheres
hemispheres= soup4.find_all("h3")
#extracting text string of the hemisphere's names
hemisphere_list =[x.text for x in hemispheres]
hemisphere_list

['Cerberus Hemisphere Enhanced',
 'Schiaparelli Hemisphere Enhanced',
 'Syrtis Major Hemisphere Enhanced',
 'Valles Marineris Hemisphere Enhanced']

In [29]:
#Empty list for image urls
image_urls = []

#looping through the hemisphere list to click on the link with the text of each item
for hemi_title in hemisphere_list:
    hemi_url = "https://astrogeology.usgs.gov/search/results?q=hemisphere+enhanced&k1=target&v1=Mars"
    browser = Browser('chrome', **executable_path, headless=False)
    browser.visit(hemi_url)
    browser.click_link_by_partial_text(hemi_title)
    html = browser.html
    soup = bs(html, 'html.parser')
    img_url =soup.find('a', target= "_blank")["href"]
    #quitting the browser to open another link from the main page in next loop
    browser.quit()
    
    #creating dictionary of title and image url
    url_dict = {"title": hemi_title, "img_url": img_url}
    image_urls.append(url_dict)

In [30]:
#The list of image urls of Mars Hemispheres
image_urls

[{'title': 'Cerberus Hemisphere Enhanced',
  'img_url': 'http://astropedia.astrogeology.usgs.gov/download/Mars/Viking/cerberus_enhanced.tif/full.jpg'},
 {'title': 'Schiaparelli Hemisphere Enhanced',
  'img_url': 'http://astropedia.astrogeology.usgs.gov/download/Mars/Viking/schiaparelli_enhanced.tif/full.jpg'},
 {'title': 'Syrtis Major Hemisphere Enhanced',
  'img_url': 'http://astropedia.astrogeology.usgs.gov/download/Mars/Viking/syrtis_major_enhanced.tif/full.jpg'},
 {'title': 'Valles Marineris Hemisphere Enhanced',
  'img_url': 'http://astropedia.astrogeology.usgs.gov/download/Mars/Viking/valles_marineris_enhanced.tif/full.jpg'}]