In [3]:
from splinter import Browser
from bs4 import BeautifulSoup
import pandas as pd

In [4]:
executable_path = {'executable_path': 'chromedriver.exe'}
browser = Browser('chrome', **executable_path, headless=False)

#### Mars Recent Headlines

In [5]:
url = 'https://mars.nasa.gov/news/'
browser.visit(url)

html = browser.html
soup = BeautifulSoup(html, 'lxml')

#find the news titles and paragraph data

space_data = soup.find_all('div', class_ = "list_text")

news_titles = []
news_p = []
for slide in space_data:
    headline_tile = slide.find('a').text
    news_tease = slide.find('div', class_ = 'article_teaser_body').text
    news_titles.append(headline_tile)
    news_p.append(news_tease)

print(len(news_titles))
print(len(news_p))

40
40


#### Featured Image

In [6]:
img_url = 'https://www.jpl.nasa.gov/spaceimages/?search=&category=Mars'
browser.visit(img_url)

site = browser.html
soup = BeautifulSoup(site, 'lxml')

In [7]:
#get Mars featured image

root_url = 'https://www.jpl.nasa.gov'

pg_header = soup.find_all('div', class_ = 'carousel_items')


for item in pg_header:
    articles = item.find('article')
    footer = articles.find('footer')
    link = footer.find('a')
    img_path = link['data-fancybox-href']
    print(img_path)

featured_image_url = root_url + img_path
print(featured_image_url)

/spaceimages/images/mediumsize/PIA20318_ip.jpg
https://www.jpl.nasa.gov/spaceimages/images/mediumsize/PIA20318_ip.jpg


In [8]:
# get all mars images
root_url = 'https://www.jpl.nasa.gov'
all_images = soup.find_all('li', class_ = "slide")

img_paths = []
for image in all_images:
    link = image.find('a')
    
    try:
        img_link = link['data-fancybox-href']
        img_path = root_url + img_link
        img_paths.append(img_path)
    except:
        pass
    #print(img_link)

#### Mars Weather

In [9]:
# Mars Weather. scrape the latest Mars weather tweet from the page

tweet_site = 'https://twitter.com/marswxreport?lang=en'
browser.visit(tweet_site)

tweet_site = browser.html
soup = BeautifulSoup(tweet_site, 'lxml')



In [10]:
tweets = soup.find_all('ol', class_ = "stream-items js-navigable-stream")

for tweet in tweets:
    section = tweet.find('div', class_ = "content")
    sect_loc = section.find('div', class_="js-tweet-text-container")
    p_text = sect_loc.find('p').text
    
p_text

'InSight sol 136 (2019-04-14) low -97.3ºC (-143.2ºF) high -16.5ºC (2.4ºF)\nwinds from the WNW at 4.3 m/s (9.6 mph) gusting to 10.8 m/s (24.2 mph)\npressure at 7.30 hPapic.twitter.com/OJFvEUR8OR'

#### Mars Facts

In [11]:
# Mars Facts. use Pandas to scrape the table containing facts about the planet

facts_site = 'https://space-facts.com/mars/'
browser.visit(facts_site)

facts_site = browser.html
soup = BeautifulSoup(facts_site, 'html')

profile = soup.find_all('article', id = "post-17")

fact_label = []
value = []

for sect in profile:
    content = sect.find('div', class_ = "post-content")
    facts = content.find('table', class_ = "tablepress tablepress-id-mars")
    fact_label = [fact.text.replace("\n", "").replace("°", "")  for fact in facts.find_all('td', class_ = "column-1")]
    value = [fact.text.replace("\n", "").replace("°", "") for fact in facts.find_all('td', class_ = "column-2")]

mars_table = dict(zip(fact_label, value))
mars_table

{'Equatorial Diameter:': '6,792 km',
 'Polar Diameter:': '6,752 km',
 'Mass:': '6.42 x 10^23 kg (10.7% Earth)',
 'Moons:': '2 (Phobos & Deimos)',
 'Orbit Distance:': '227,943,824 km (1.52 AU)',
 'Orbit Period:': '687 days (1.9 years)',
 'Surface Temperature: ': '-153 to 20 C',
 'First Record:': '2nd millennium BC',
 'Recorded By:': 'Egyptian astronomers'}

In [12]:
facts_table = soup.find_all('table')

mars_facts = pd.read_html(str(facts_table))

#convert to dataframe and drop index
mars_facts_df = mars_facts[0]
mars_facts_df.columns = ['Fact', 'Values']

mars_facts_df.set_index('Fact', drop = True, inplace = True)

#convert new dataframe to dictionary

mars_table = mars_facts_df.to_dict()



for row, vals in mars_table.items():
    m_data = vals;
    print(m_data)

{'Equatorial Diameter:': '6,792 km', 'Polar Diameter:': '6,752 km', 'Mass:': '6.42 x 10^23 kg (10.7% Earth)', 'Moons:': '2 (Phobos & Deimos)', 'Orbit Distance:': '227,943,824 km (1.52 AU)', 'Orbit Period:': '687 days (1.9 years)', 'Surface Temperature:': '-153 to 20 °C', 'First Record:': '2nd millennium BC', 'Recorded By:': 'Egyptian astronomers'}


#### Mars Hemispheres

In [13]:
# obtain high resolution images for each of Mar's hemispheres.

hemis_site = 'https://astrogeology.usgs.gov/search/results?q=hemisphere+enhanced&k1=target&v1=Mars'
browser.visit(hemis_site)

hemis_site = browser.html
soup = BeautifulSoup(hemis_site, 'lxml')


In [14]:
#get the urls and image names for each hemisphere.
# Step 1 get the hemisphere names and the link to each hemisphere page
results = soup.find_all('div', class_ = "item")
base_url = 'https://astrogeology.usgs.gov'

hemi_nm  = []
hemi_url = []
for result in results:
    desc = result.find('div', class_ = "description")
    links_all = desc.find('a', class_ = "itemLink product-item")
    link = links_all['href']
    title  = links_all.find('h3').text
    full_link = base_url + link
    hemi_url.append(full_link)
    hemi_nm.append(title)

print(hemi_nm)
print("------")
print(hemi_url)





['Cerberus Hemisphere Enhanced', 'Schiaparelli Hemisphere Enhanced', 'Syrtis Major Hemisphere Enhanced', 'Valles Marineris Hemisphere Enhanced']
------
['https://astrogeology.usgs.gov/search/map/Mars/Viking/cerberus_enhanced', 'https://astrogeology.usgs.gov/search/map/Mars/Viking/schiaparelli_enhanced', 'https://astrogeology.usgs.gov/search/map/Mars/Viking/syrtis_major_enhanced', 'https://astrogeology.usgs.gov/search/map/Mars/Viking/valles_marineris_enhanced']


In [15]:
# step 2: get the url for the full image by looping through hemi_url and using splinter to navigate to each page
#then scraping the page

img_base_url = 'https://astrogeology.usgs.gov'
large_img_url = []

for link in hemi_url:
    link_text = link
    img_site = f"{link_text}"
    browser.visit(img_site)
    img_site = browser.html
    soup = BeautifulSoup(img_site, 'lxml')
    hemi_info = soup.find_all('div', class_ = "wide-image-wrapper ")
    for item in hemi_info:
        image = item.find('img', class_ = "wide-image")['src']
        full_img_url = img_base_url + image
        large_img_url.append(full_img_url)
        print(full_img_url)

    
    

https://astrogeology.usgs.gov/cache/images/cfa62af2557222a02478f1fcd781d445_cerberus_enhanced.tif_full.jpg
https://astrogeology.usgs.gov/cache/images/3cdd1cbf5e0813bba925c9030d13b62e_schiaparelli_enhanced.tif_full.jpg
https://astrogeology.usgs.gov/cache/images/ae209b4e408bb6c3e67b6af38168cf28_syrtis_major_enhanced.tif_full.jpg
https://astrogeology.usgs.gov/cache/images/7cf2da4bf549ed01c17f206327be4db7_valles_marineris_enhanced.tif_full.jpg


In [16]:
#combine hemisphere name and image url into a dictionary

hemisphere_img_urls = dict(zip(hemi_nm,large_img_url ))

hemisphere_img_urls 

{'Cerberus Hemisphere Enhanced': 'https://astrogeology.usgs.gov/cache/images/cfa62af2557222a02478f1fcd781d445_cerberus_enhanced.tif_full.jpg',
 'Schiaparelli Hemisphere Enhanced': 'https://astrogeology.usgs.gov/cache/images/3cdd1cbf5e0813bba925c9030d13b62e_schiaparelli_enhanced.tif_full.jpg',
 'Syrtis Major Hemisphere Enhanced': 'https://astrogeology.usgs.gov/cache/images/ae209b4e408bb6c3e67b6af38168cf28_syrtis_major_enhanced.tif_full.jpg',
 'Valles Marineris Hemisphere Enhanced': 'https://astrogeology.usgs.gov/cache/images/7cf2da4bf549ed01c17f206327be4db7_valles_marineris_enhanced.tif_full.jpg'}