In [5]:
# Import dependencies
# Splinter and BeautifulSoup
from splinter import Browser
from bs4 import BeautifulSoup as soup
from webdriver_manager.chrome import ChromeDriverManager
# Import Pandas
import pandas as pd

In [6]:
# Set executable path
executable_path = {'executable_path': ChromeDriverManager().install()}
# Set up the URL for scraping
browser = Browser('chrome', **executable_path, headless=False)



Current google-chrome version is 91.0.4472
Get LATEST driver version for 91.0.4472
Driver [/Users/jesssanchez/.wdm/drivers/chromedriver/mac64/91.0.4472.101/chromedriver] found in cache


### Visit the NASA Mars News Site

In [350]:
# Visit the mars nasa news site
url = 'https://redplanetscience.com'
browser.visit(url)

# Optional delay for loading the page
browser.is_element_present_by_css('div.list_text', wait_time=1)

True

In [301]:
# Set up the HTML parser
# Convert the browser html to a soup object and then quit the browser
html = browser.html
news_soup = soup(html, 'html.parser')

slide_elem = news_soup.select_one('div.list_text')

In [302]:
# Begin scraping
slide_elem.find('div', class_='content_title')

<div class="content_title">NASA-JPL Names 'Rolling Stones Rock' on Mars</div>

In [303]:
# Use the parent element to find the first 'a' tag and save it as 'news_title'
news_title = slide_elem.find('div', class_='content_title').get_text()
news_title

"NASA-JPL Names 'Rolling Stones Rock' on Mars"

In [304]:
# Use the parent element to find the paragraph text
news_p = slide_elem.find('div', class_='article_teaser_body').get_text()
news_p

"NASA's Mars InSight mission honored one of the biggest bands of all time at Pasadena concert."

### JPL Space Images Featured Image

In [305]:
# Visit URL
url = 'https://spaceimages-mars.com'
browser.visit(url)

In [306]:
# Find and click the full image button
full_image_elem = browser.find_by_tag('button')[1]
full_image_elem.click()

In [307]:
# Parse the resulting html with soup
html = browser.html
img_soup = soup(html, 'html.parser')

In [308]:
# Parse the resulting html with soup
html = browser.html
img_soup = soup(html, 'html.parser')
img_soup

<html class="fancybox-margin fancybox-lock"><head>
<meta charset="utf-8"/>
<meta content="width=device-width, initial-scale=1" name="viewport"/>
<link href="https://maxcdn.bootstrapcdn.com/bootstrap/4.0.0/css/bootstrap.min.css" rel="stylesheet"/>
<!-- <link rel="stylesheet" type="text/css" href="css/font.css"> -->
<link href="css/app.css" rel="stylesheet" type="text/css"/>
<link href="https://stackpath.bootstrapcdn.com/font-awesome/4.7.0/css/font-awesome.min.css" rel="stylesheet" type="text/css"/>
<title>Space Image</title>
<style type="text/css">.fancybox-margin{margin-right:0px;}</style></head>
<body>
<div class="header">
<nav class="navbar navbar-expand-lg">
<a class="navbar-brand" href="#"><img id="logo" src="image/nasa.png"/><span class="logo">Jet Propulsion Laboratory</span>
<span class="logo1">California Institute of Technology</span></a>
<button aria-controls="navbarNav" aria-expanded="false" aria-label="Toggle navigation" class="navbar-toggler" data-target="#navbarNav" data-to

In [309]:
# Find the relative image url
img_url_rel = img_soup.find('img', class_='fancybox-image').get('src')
img_url_rel

'image/featured/mars1.jpg'

In [310]:
# Use the base URL to create an absolute URL (add base URL)
img_url = f'https://spaceimages-mars.com/{img_url_rel}'
img_url

'https://spaceimages-mars.com/image/featured/mars1.jpg'

### Mars Facts

In [311]:
# Scrape table with Pandas
# Create a new DataFrame from the HTML table
df = pd.read_html('https://galaxyfacts-mars.com')[0]
df.head()

Unnamed: 0,0,1,2
0,Mars - Earth Comparison,Mars,Earth
1,Diameter:,"6,779 km","12,742 km"
2,Mass:,6.39 × 10^23 kg,5.97 × 10^24 kg
3,Moons:,2,1
4,Distance from Sun:,"227,943,824 km","149,598,262 km"


In [312]:
# Assign columns to the new Data Frame
df.columns=['Description', 'Mars', 'Earth']

# Turn the Description column into the DataFrame's index
df.set_index('Description', inplace=True)

# Display DataFrame
df

Unnamed: 0_level_0,Mars,Earth
Description,Unnamed: 1_level_1,Unnamed: 2_level_1
Mars - Earth Comparison,Mars,Earth
Diameter:,"6,779 km","12,742 km"
Mass:,6.39 × 10^23 kg,5.97 × 10^24 kg
Moons:,2,1
Distance from Sun:,"227,943,824 km","149,598,262 km"
Length of Year:,687 Earth days,365.24 days
Temperature:,-87 to -5 °C,-88 to 58°C


In [313]:
# Convert DataFrame back into HTML-ready code
df.to_html()

'<table border="1" class="dataframe">\n  <thead>\n    <tr style="text-align: right;">\n      <th></th>\n      <th>Mars</th>\n      <th>Earth</th>\n    </tr>\n    <tr>\n      <th>Description</th>\n      <th></th>\n      <th></th>\n    </tr>\n  </thead>\n  <tbody>\n    <tr>\n      <th>Mars - Earth Comparison</th>\n      <td>Mars</td>\n      <td>Earth</td>\n    </tr>\n    <tr>\n      <th>Diameter:</th>\n      <td>6,779 km</td>\n      <td>12,742 km</td>\n    </tr>\n    <tr>\n      <th>Mass:</th>\n      <td>6.39 × 10^23 kg</td>\n      <td>5.97 × 10^24 kg</td>\n    </tr>\n    <tr>\n      <th>Moons:</th>\n      <td>2</td>\n      <td>1</td>\n    </tr>\n    <tr>\n      <th>Distance from Sun:</th>\n      <td>227,943,824 km</td>\n      <td>149,598,262 km</td>\n    </tr>\n    <tr>\n      <th>Length of Year:</th>\n      <td>687 Earth days</td>\n      <td>365.24 days</td>\n    </tr>\n    <tr>\n      <th>Temperature:</th>\n      <td>-87 to -5 °C</td>\n      <td>-88 to 58°C</td>\n    </tr>\n  </tbody>

# D1: Scrape High-Resolution Mars’ Hemisphere Images and Titles

### Hemispheres

In [147]:
# 1. Use browser to visit the URL 
url = 'https://marshemispheres.com/'
browser.visit(url)

In [148]:
# Parse the resulting html with soup
html = browser.html
img_soup = soup(html, 'html.parser')
print(img_soup.prettify())

<html lang="en">
 <head>
  <meta content="text/html; charset=utf-8" http-equiv="Content-Type"/>
  <link href="css/jquery-ui.css" rel="stylesheet" type="text/css"/>
  <title>
   Astropedia Search Results | GUSS Astrogeology Science Center
  </title>
  <meta content="GUSS Astrogeology Science Center Astropedia search results." name="description"/>
  <meta content="GUSS,Astrogeology Science Center,Cartography,Geology,Space,Geological Survey,Mapping" name="keywords"/>
  <meta content="IE=edge" http-equiv="X-UA-Compatible"/>
  <meta content="width=device-width, initial-scale=1, maximum-scale=1" name="viewport"/>
  <link href="css/main.css" media="screen" rel="stylesheet"/>
  <link href="css/print.css" media="print" rel="stylesheet"/>
  <link href="#" rel="icon" type="image/x-ico"/>
 </head>
 <body id="results">
  <header>
   <a href="#" style="float:right;margin-top:10px;" target="_blank">
    <img alt="USGS: Science for a Changing World" class="logo" height="60" src="images/usgs_logo_main_

In [176]:
# 2. Create a list to hold the images and titles.
hemisphere_image_urls = []

In [17]:
img_soup.title

<title>Astropedia Search Results | GUSS Astrogeology Science Center</title>

In [18]:
img_soup.title.text

'Astropedia Search Results | GUSS Astrogeology Science Center'

In [34]:
img_soup.title.text.strip()

'Astropedia Search Results | GUSS Astrogeology Science Center'

In [19]:
img_soup.body

<body id="results">
<header>
<a href="#" style="float:right;margin-top:10px;" target="_blank">
<img alt="USGS: Science for a Changing World" class="logo" height="60" src="images/usgs_logo_main_2x.png"/>
</a>
<a href="#" style="float:right;margin-top:5px;margin-right:20px;" target="_blank">
<img alt="NASA" class="logo" height="65" src="images/nasa-logo-web-med.png"/>
</a>
</header>
<div class="wrapper">
<div class="container">
<div class="widget block bar">
<a href="https://astrogeology.usgs.gov/search" style="float:right;text-decoration:none;">
<img alt="Astropedia" src="images/astropedia-logo-main.png" style="width:200px;border:none;float:right;"/>
<div style="clear:both;font-size:.8em;float:right;color:#888;">Lunar and Planetary Cartographic Catalog</div>
</a>
<div style="float:left;height:60px;">
</div>
</div>
<div class="full-content">
<section class="block" id="results-accordian">
<div class="result-list" data-section="product" id="product-section">
<div class="accordian">
<h2>Pro

In [20]:
img_soup.body.text

'\n\n\n\n\n\n\n\n\n\n\n\n\n\nLunar and Planetary Cartographic Catalog\n\n\n\n\n\n\n\n\nProducts\n4 Results\nCollapse\n\n\n\n\n\n\nCerberus Hemisphere Enhanced\n\nimage/tiff 21 MB\nMosaic of the Cerberus hemisphere of Mars projected into point perspective, a view similar to that which one would see from a spacecraft. This mosaic is composed of 104 Viking Orbiter images acquired…\n\n\n\n\n\n\n\nSchiaparelli Hemisphere Enhanced\n\nimage/tiff 35 MB\nMosaic of the Schiaparelli hemisphere of Mars projected into point perspective, a view similar to that which one would see from a spacecraft. The images were acquired in 1980 during early northern…\n\n\n\n\n\n\n\nSyrtis Major Hemisphere Enhanced\n\nimage/tiff 25 MB\nMosaic of the Syrtis Major hemisphere of Mars projected into point perspective, a view similar to that which one would see from a spacecraft. This mosaic is composed of about 100 red and violet…\n\n\n\n\n\n\n\nValles Marineris Hemisphere Enhanced\n\nimage/tiff 27 MB\nMosaic of the V

In [22]:
img_soup.body.p.text

'Mosaic of the Cerberus hemisphere of Mars projected into point perspective, a view similar to that which one would see from a spacecraft. This mosaic is composed of 104 Viking Orbiter images acquired…'

In [23]:
img_soup.body.find_all('p')

[<p>Mosaic of the Cerberus hemisphere of Mars projected into point perspective, a view similar to that which one would see from a spacecraft. This mosaic is composed of 104 Viking Orbiter images acquired…</p>,
 <p>Mosaic of the Schiaparelli hemisphere of Mars projected into point perspective, a view similar to that which one would see from a spacecraft. The images were acquired in 1980 during early northern…</p>,
 <p>Mosaic of the Syrtis Major hemisphere of Mars projected into point perspective, a view similar to that which one would see from a spacecraft. This mosaic is composed of about 100 red and violet…</p>,
 <p>Mosaic of the Valles Marineris hemisphere of Mars projected into point perspective, a view similar to that which one would see from a spacecraft. The distance is 2500 kilometers from the surface of…</p>]

In [35]:
img_soup.body.find('p')

<p>Mosaic of the Cerberus hemisphere of Mars projected into point perspective, a view similar to that which one would see from a spacecraft. This mosaic is composed of 104 Viking Orbiter images acquired…</p>

In [26]:
img_soup.body.find_all('div')

[<div class="wrapper">
 <div class="container">
 <div class="widget block bar">
 <a href="https://astrogeology.usgs.gov/search" style="float:right;text-decoration:none;">
 <img alt="Astropedia" src="images/astropedia-logo-main.png" style="width:200px;border:none;float:right;"/>
 <div style="clear:both;font-size:.8em;float:right;color:#888;">Lunar and Planetary Cartographic Catalog</div>
 </a>
 <div style="float:left;height:60px;">
 </div>
 </div>
 <div class="full-content">
 <section class="block" id="results-accordian">
 <div class="result-list" data-section="product" id="product-section">
 <div class="accordian">
 <h2>Products</h2>
 <span class="count">4 Results</span>
 <span class="collapse">Collapse</span>
 </div>
 <div class="collapsible results">
 <div class="item">
 <a class="itemLink product-item" href="cerberus.html"><img alt="Cerberus Hemisphere Enhanced thumbnail" class="thumb" src="images/39d3266553462198bd2fbc4d18fbed17_cerberus_enhanced.tif_thumb.png"/></a>
 <div class="d

In [54]:
urls = img_soup.body.find_all('a')
for i in urls:
    print(i.text.strip())



Lunar and Planetary Cartographic Catalog

Cerberus Hemisphere Enhanced

Schiaparelli Hemisphere Enhanced

Syrtis Major Hemisphere Enhanced

Valles Marineris Hemisphere Enhanced
Back
Search
About
Contact
GUSS Science Center


In [62]:
img_soup.body.find('p').text

'Mosaic of the Cerberus hemisphere of Mars projected into point perspective, a view similar to that which one would see from a spacecraft. This mosaic is composed of 104 Viking Orbiter images acquired…'

In [63]:
img_soup.body.find_all('p')

[<p>Mosaic of the Cerberus hemisphere of Mars projected into point perspective, a view similar to that which one would see from a spacecraft. This mosaic is composed of 104 Viking Orbiter images acquired…</p>,
 <p>Mosaic of the Schiaparelli hemisphere of Mars projected into point perspective, a view similar to that which one would see from a spacecraft. The images were acquired in 1980 during early northern…</p>,
 <p>Mosaic of the Syrtis Major hemisphere of Mars projected into point perspective, a view similar to that which one would see from a spacecraft. This mosaic is composed of about 100 red and violet…</p>,
 <p>Mosaic of the Valles Marineris hemisphere of Mars projected into point perspective, a view similar to that which one would see from a spacecraft. The distance is 2500 kilometers from the surface of…</p>]

In [64]:
img_soup.a.find()

<img alt="USGS: Science for a Changing World" class="logo" height="60" src="images/usgs_logo_main_2x.png"/>

In [155]:
results = img_soup.find_all('div', class_='item')
results

[<div class="item">
 <a class="itemLink product-item" href="cerberus.html"><img alt="Cerberus Hemisphere Enhanced thumbnail" class="thumb" src="images/39d3266553462198bd2fbc4d18fbed17_cerberus_enhanced.tif_thumb.png"/></a>
 <div class="description">
 <a class="itemLink product-item" href="cerberus.html">
 <h3>Cerberus Hemisphere Enhanced</h3>
 </a>
 <span class="subtitle" style="float:left">image/tiff 21 MB</span><span class="pubDate" style="float:right"></span><br/>
 <p>Mosaic of the Cerberus hemisphere of Mars projected into point perspective, a view similar to that which one would see from a spacecraft. This mosaic is composed of 104 Viking Orbiter images acquired…</p>
 </div>
 <!-- end description -->
 </div>,
 <div class="item">
 <a class="itemLink product-item" href="schiaparelli.html"><img alt="Schiaparelli Hemisphere Enhanced thumbnail" class="thumb" src="images/08eac6e22c07fb1fe72223a79252de20_schiaparelli_enhanced.tif_thumb.png"/></a>
 <div class="description">
 <a class="ite

In [160]:
title = results.find_all('h3').text


AttributeError: ResultSet object has no attribute 'find_all'. You're probably treating a list of elements like a single element. Did you call find_all() when you meant to call find()?

In [151]:
results = img_soup.find_all('div', class_='item')

for result in results:
    title = result.find('h3')

[<div class="item">
 <a class="itemLink product-item" href="cerberus.html"><img alt="Cerberus Hemisphere Enhanced thumbnail" class="thumb" src="images/39d3266553462198bd2fbc4d18fbed17_cerberus_enhanced.tif_thumb.png"/></a>
 <div class="description">
 <a class="itemLink product-item" href="cerberus.html">
 <h3>Cerberus Hemisphere Enhanced</h3>
 </a>
 <span class="subtitle" style="float:left">image/tiff 21 MB</span><span class="pubDate" style="float:right"></span><br/>
 <p>Mosaic of the Cerberus hemisphere of Mars projected into point perspective, a view similar to that which one would see from a spacecraft. This mosaic is composed of 104 Viking Orbiter images acquired…</p>
 </div>
 <!-- end description -->
 </div>,
 <div class="item">
 <a class="itemLink product-item" href="schiaparelli.html"><img alt="Schiaparelli Hemisphere Enhanced thumbnail" class="thumb" src="images/08eac6e22c07fb1fe72223a79252de20_schiaparelli_enhanced.tif_thumb.png"/></a>
 <div class="description">
 <a class="ite

In [120]:
results = img_soup.find_all('a', class_='itemLink product-item')
results

[<a class="itemLink product-item" href="cerberus.html"><img alt="Cerberus Hemisphere Enhanced thumbnail" class="thumb" src="images/39d3266553462198bd2fbc4d18fbed17_cerberus_enhanced.tif_thumb.png"/></a>,
 <a class="itemLink product-item" href="cerberus.html">
 <h3>Cerberus Hemisphere Enhanced</h3>
 </a>,
 <a class="itemLink product-item" href="schiaparelli.html"><img alt="Schiaparelli Hemisphere Enhanced thumbnail" class="thumb" src="images/08eac6e22c07fb1fe72223a79252de20_schiaparelli_enhanced.tif_thumb.png"/></a>,
 <a class="itemLink product-item" href="schiaparelli.html">
 <h3>Schiaparelli Hemisphere Enhanced</h3>
 </a>,
 <a class="itemLink product-item" href="syrtis.html"><img alt="Syrtis Major Hemisphere Enhanced thumbnail" class="thumb" src="images/55a0a1e2796313fdeafb17c35925e8ac_syrtis_major_enhanced.tif_thumb.png"/></a>,
 <a class="itemLink product-item" href="syrtis.html">
 <h3>Syrtis Major Hemisphere Enhanced</h3>
 </a>,
 <a class="itemLink product-item" href="valles.html"><

In [177]:
browser.find_by_tag('div.item').click()

In [187]:
titles = img_soup.find('h3').get_text()
titles

'Cerberus Hemisphere Enhanced'

In [179]:
title = img_soup.find('h3')
title

<h3>Cerberus Hemisphere Enhanced</h3>

In [201]:
for result in results:
    hemispheres = {}
    try:
        title = result.find('h3').text
        url = result.a['href']
        
        if (title and url):
            print(title)
            print(url)
    except AttributeError as e:
        print(e)

Cerberus Hemisphere Enhanced
cerberus.html
Schiaparelli Hemisphere Enhanced
schiaparelli.html
Syrtis Major Hemisphere Enhanced
syrtis.html
Valles Marineris Hemisphere Enhanced
valles.html


In [None]:
for i in range(4):
    html = browser.html
    img_soup = soup(html, 'html.parser')
    results = img_soup.find_all('a', class_='itemLink product-item')
    
    for result in results:
        title = result.find('h3').text
        

In [552]:
# 3. Write code to retrieve the image urls and titles for each hemisphere image.
for i in range(4):
    #create empty dictionary
    hemispheres = {}
    
    # Click on each hemisphere link
    browser.find_by_css('a.product-item h3')[i].click()
    
    # Navigate to the full-resolution image page
    element = browser.links.find_by_text('Sample').first
    
    # Retrieve the full-resolution image URL string and title for the hemisphere image
    img_url = element['href']
    title = browser.find_by_css("h2.title").text
    
    # Navigate back to the beginning to get the next hemisphere image
    browser.back()
    
    # Save the full-resolution image URL string
    hemispheres["img_url"] = img_url
    
    # Save the hemisphere image title
    hemispheres["title"] = title
    
    # Add img_url and title to hemispheres dictionary
    hemisphere_image_urls.append(hemispheres)
    

In [553]:
# 4. Print the list that holds the dictionary of each image url and title.
hemisphere_image_urls

[{'img_url': 'https://marshemispheres.com/images/full.jpg',
  'title': 'Cerberus Hemisphere Enhanced'},
 {'img_url': 'https://marshemispheres.com/images/schiaparelli_enhanced-full.jpg',
  'title': 'Schiaparelli Hemisphere Enhanced'},
 {'img_url': 'https://marshemispheres.com/images/syrtis_major_enhanced-full.jpg',
  'title': 'Syrtis Major Hemisphere Enhanced'},
 {'img_url': 'https://marshemispheres.com/images/valles_marineris_enhanced-full.jpg',
  'title': 'Valles Marineris Hemisphere Enhanced'}]

In [544]:
title = img_soup.find('h3').get_text()
title
for i in range(len(img_soup)):
    if title not in hemispheres:
        print(title)
    else:
        print('in dict')

Cerberus Hemisphere Enhanced


In [547]:
img_url = img_soup.select('a[href]')
img_url

[<a href="#" style="float:right;margin-top:10px;" target="_blank">
 <img alt="USGS: Science for a Changing World" class="logo" height="60" src="images/usgs_logo_main_2x.png"/>
 </a>,
 <a href="#" style="float:right;margin-top:5px;margin-right:20px;" target="_blank">
 <img alt="NASA" class="logo" height="65" src="images/nasa-logo-web-med.png"/>
 </a>,
 <a href="https://astrogeology.usgs.gov/search" style="float:right;text-decoration:none;">
 <img alt="Astropedia" src="images/astropedia-logo-main.png" style="width:200px;border:none;float:right;"/>
 <div style="clear:both;font-size:.8em;float:right;color:#888;">Lunar and Planetary Cartographic Catalog</div>
 </a>,
 <a class="itemLink product-item" href="cerberus.html"><img alt="Cerberus Hemisphere Enhanced thumbnail" class="thumb" src="images/39d3266553462198bd2fbc4d18fbed17_cerberus_enhanced.tif_thumb.png"/></a>,
 <a class="itemLink product-item" href="cerberus.html">
 <h3>Cerberus Hemisphere Enhanced</h3>
 </a>,
 <a class="itemLink prod

In [None]:
# Use the parent element to find the first 'a' tag 
news_title = slide_elem.find('div', class_='content_title').get_text()
news_title

In [4]:
img_soup.body

NameError: name 'img_soup' is not defined

In [469]:
# 2. Create a list to hold the images and titles.
hemisphere_image_urls = []

In [459]:
for i in range (4):
    images = browser.find_by_tag('h3')
    images[i].click()
    partial_url = img_soup.find("img", class_="wide-image")["src"]
    image_title = img_soup.find("h2",class_="title").text
    image_url = 'https://astrogeology.usgs.gov'+ partial_url
    image_dict = {"title":image_title,"image_url":image_url}
    hemisphere_image_urls.append(image_dict)
    browser.back()
    
    #full_img = browser.find_by_css('a.product-item img')
    #full_img.click()

TypeError: 'NoneType' object is not subscriptable

In [460]:
full_img = browser.find_by_css('a.product-item img')

In [461]:
img_url = img_soup.find('img', class_='wide-image').get('src')
img_url

AttributeError: 'NoneType' object has no attribute 'get'

In [428]:
hemisphere = {}

In [435]:
for i in range (len(full_img)):
    # Find and click the full image button
    full_img_elem = browser.find_by_tag('h3')[i]
    full_img_elem.click()
    img = img_soup.find('a', text='Sample')
    full_img = f'{url}{img_url}'
    
    
    #full_image_elem.click()
    # Find the sample image anchor tag and extract the href
    #img_url = img_soup.find('img', class_='wide-image').get('src')
    #full_img_url = f'https://marshemispheres.com/{img_url}'
   


In [436]:
full_img

<splinter.element_list.ElementList at 0x7ff18c8c20d0>

In [None]:
hemisphere = {}

In [None]:
for i in range(len(urls)):
    full_img_url = browser.find_by_tag('h3')[i]
    full_img_url.click()
    
    # Get image url
    img = img_soup.find('a', text='Sample').attrs['href']
    full_img = f'{url}{img}'
    
    # Get hemisphere title
    title = img_soup.find('h2', class_='title').get_text()
    
    hemisphere[f'title'] = title
    hemisphere[f'img_url'] = full_img
    
    hemisphere_image_urls.append(hemisphere)
    
    # Go back
    browser.back()

[{'img_url': 'https://marshemispheres.com/images/full.jpg',
  'title': 'Cerberus Hemisphere Enhanced'},
 {'img_url': 'https://marshemispheres.com/images/schiaparelli_enhanced-full.jpg',
  'title': 'Schiaparelli Hemisphere Enhanced'},
 {'img_url': 'https://marshemispheres.com/images/syrtis_major_enhanced-full.jpg',
  'title': 'Syrtis Major Hemisphere Enhanced'},
 {'img_url': 'https://marshemispheres.com/images/valles_marineris_enhanced-full.jpg',
  'title': 'Valles Marineris Hemisphere Enhanced'}]

In [255]:
for img in img_soup.find_all('a'):
    print(link.get('href'))

#
#
https://astrogeology.usgs.gov/search
cerberus.html
cerberus.html
schiaparelli.html
schiaparelli.html
syrtis.html
syrtis.html
valles.html
valles.html
#
#
#
#
#


In [1]:
# Find the relative image url
img_url_rel = img_soup.find('a').get('src')
img_url_rel

NameError: name 'img_soup' is not defined

In [217]:
# 3. Write code to retrieve the image urls and titles for each hemisphere
for i in range(4):
    # Loop through links, click the link, find the sample a, return href
    hemispheres = {}
    # Get list of hemispheres
    
    

ElementDoesNotExist: no elements could be found with tag_name "h3"

In [199]:




for i in img_urls:
    # Save the full-resolution image URL string as the value for the img_url key that will be stored in the dictionary you created from the Hint
    full_image_url = browser.find_by_tag('h3')[i].click()
    #full_image_url.click()
    # Find the Sample image anchor tag & get the href
    img = img_soup.find('a', text='Sample').get('src')
    full_img = f'{url}{img}'
    
    # Save the hemisphere image title as the value for the title key that will be stored in the dictionary you created from the Hint.
    #Get hemisphere title
    title = img_soup.find_all('h2', class_='title')
    
    # Append hemisphere object to a list
    hemisphere[f'title'] = title
    hemisphere[f'img_url'] = full_img
    hemisphere_image_urls.append(hemisphere)
    
img_urls

TypeError: Object of type Tag is not JSON serializable

[{'img_url': 'https://marshemispheres.com/<a href="images/syrtis_major_enhanced-full.jpg" target="_blank">Sample</a>'},
 {'title': [<h2 class="title">Syrtis Major Hemisphere Enhanced</h2>],
  'img_url': 'https://marshemispheres.com/<a href="images/syrtis_major_enhanced-full.jpg" target="_blank">Sample</a>'}]

In [None]:
#full_image = [a['href'] for a in img_soup.find_all('a', href=True)]
#full_image

In [166]:
for a in img_soup.find_all('a', href=True):
    print("Found the URL:", a['href'])



Found the URL: #
Found the URL: #
Found the URL: https://astrogeology.usgs.gov/search
Found the URL: cerberus.html
Found the URL: cerberus.html
Found the URL: schiaparelli.html
Found the URL: schiaparelli.html
Found the URL: syrtis.html
Found the URL: syrtis.html
Found the URL: valles.html
Found the URL: valles.html
Found the URL: #
Found the URL: #
Found the URL: #
Found the URL: #
Found the URL: #


In [2]:
 # Parse the resulting html with soup
html = browser.html
#img_soup = soup(html, 'html.parser')
# Find the relative image url
img_url = browser.find_by_tag('img')[1]
img_url.click()

NameError: name 'browser' is not defined

In [115]:
for hem in img_url:
    hemispheres = {}
    # Find the image url
    img_url = img_soup.find_all('a', 'href')
    print(img_url)
    hemisphere_image_urls.append(hemispheres)

[{'img_url': 'https://marshemispheres.com/<a href="images/syrtis_major_enhanced-full.jpg" target="_blank">Sample</a>'},
 {'title': [<h2 class="title">Syrtis Major Hemisphere Enhanced</h2>],
  'img_url': 'https://marshemispheres.com/<a href="images/syrtis_major_enhanced-full.jpg" target="_blank">Sample</a>'}]

In [3]:
#for i in range(4):
#hemispheres = {}
# Parse the resulting html with soup
html = browser.html
img_soup = soup(html, 'html.parser')
# Find the relative image url
img_url = browser.find_by_tag('a').get('href')
img_url.click()
# Find and click the full image button
#full_image_elem = browser.find_by_tag('button')[1]
#full_image_elem.click()

NameError: name 'browser' is not defined

In [None]:

# Find the relative image url
img_url_rel = img_soup.find('img', class_='fancybox-image').get('src')
img_url_rel
# Use the base URL to create an absolute URL (add base URL)
img_url = f'https://spaceimages-mars.com/{img_url_rel}'
img_url

# Iterate through page 1-4
for x in range (1, 5):
    # Create an HTML object
    html = browser.html
    # Parse the HTML object using Beautiful Soup
    quote_soup = soup(html, 'html.parser')
    quotes = quote_soup.find_all('span', class_='text')
    # Print each quote parsed by BeautifulSoup
    for quote in quotes:
        print('page:', x, '----------')
        print(quote.text)
    #Click the "Next" button using Splinter
    browser.links.find_by_partial_text('Next').click()

In [None]:
# 5. Quit the browser
browser.quit()