In [1]:
#to get the site’s HTML code into our Python script so that you can interact with it. 
#For this task, we shall use Python’s requests library.
!pip install requests



In [2]:
#This code performs an HTTP request to the given URL. 
#It retrieves the HTML data that the server sends back and stores that data in a Python object.
import requests

URL = 'https://www.monster.com/jobs/search/?q=Software-Developer&where=Australia'
page = requests.get(URL)
print(page)

In [9]:
#parse this lengthy code response with Beautiful Soup to make it more accessible and pick out the data that 
#you’re interested in.

#Beautiful Soup is a Python library for parsing structured data. 
#It allows you to interact with HTML in a similar way to how you would interact with a web page using developer tools. 
#Beautiful Soup exposes a couple of intuitive functions you can use to explore the HTML you received.
#
To get started, use your terminal to install the Beautiful Soup library
!pip3 install beautifulsoup4



In [33]:
#When you add the two highlighted lines of code, you’re creating a 
#Beautiful Soup object that takes the HTML content you scraped earlier as its input.
#When you instantiate the object, you also instruct Beautiful Soup to use the appropriate parser.
import requests
from bs4 import BeautifulSoup

URL = 'https://www.monster.com/jobs/search/?q=Software-Developer&where=Australia'
page = requests.get(URL)

soup = BeautifulSoup(page.content, 'html.parser')

In [34]:
##Find Elements by ID
#In an HTML web page, every element can have an id attribute assigned.
#As the name already suggests, that id attribute makes the element uniquely identifiable on the page. 
#You can begin to parse your page by selecting a specific element by its ID.
results = soup.find(id='ResultsContainer')

#For easier viewing, you can .prettify() any Beautiful Soup object when you print it out. 
print(results.prettify())

<div class="mux-custom-scroll" data-extend="left" data-mux="customScroll" data-target="html" id="ResultsContainer">
 <div class="scrollable" id="ResultsScrollable">
  <script type="application/ld+json">
   {"@context":"https://schema.org","@type":"ItemList","mainEntityOfPage":{
            "@type":"CollectionPage","@id":"https://www.monster.com/jobs/search/?q=Software-Developer&amp;where=Australia"
            }
            ,"itemListElement":[

                 {"@type":"ListItem","position":1,"url":"https://job-openings.monster.com/senior-lead-software-engineer-browser-sunnyvale-ca-plantation-fl-hq-austin-tx-culver-new-york-city-ca-seattle-wa-toronto-ny-us-magic-leap-inc/36b509cf-114c-48aa-aede-e6574b6cbff5"}
                    ,
                 {"@type":"ListItem","position":2,"url":""}
                    ,
                 {"@type":"ListItem","position":3,"url":"https://job-openings.monster.com/sql-bi-ssrs-ssis-developer-for-blackboard-nyc-new-york-wa-us-lancesoft-inc/5

In [13]:
#Here, you call .find_all() on a Beautiful Soup object,
#which returns an iterable containing all the HTML for all the job listings displayed on that page.
job_elems = results.find_all('section', class_='card-content')

In [14]:
for job_elem in job_elems:
    print(job_elem, end='\n'*2)

<section class="card-content" data-jobid="36b509cf-114c-48aa-aede-e6574b6cbff5" onclick="MKImpressionTrackingMouseDownHijack(this, event)">
<div class="flex-row">
<div class="mux-company-logo thumbnail"></div>
<div class="summary">
<header class="card-header">
<h2 class="title"><a data-bypass="true" data-m_impr_a_placement_id="JSR2CW" data-m_impr_j_cid="660" data-m_impr_j_coc="" data-m_impr_j_jawsid="435655462" data-m_impr_j_jobid="2219341" data-m_impr_j_jpm="2" data-m_impr_j_jpt="3" data-m_impr_j_lat="0" data-m_impr_j_lid="0" data-m_impr_j_long="0" data-m_impr_j_occid="11970" data-m_impr_j_p="1" data-m_impr_j_postingid="36b509cf-114c-48aa-aede-e6574b6cbff5" data-m_impr_j_pvc="ec3a6188-6a80-441a-814d-a9e2c9b76318" data-m_impr_s_t="t" data-m_impr_uuid="2c9cbc3d-d0a9-4839-a00c-7289830040f4" href="https://job-openings.monster.com/senior-lead-software-engineer-browser-sunnyvale-ca-plantation-fl-hq-austin-tx-culver-new-york-city-ca-seattle-wa-toronto-ny-us-magic-leap-inc/36b509cf-114c-48aa-

In [15]:
for job_elem in job_elems:
    # Each job_elem is a new BeautifulSoup object.
    # You can use the same methods on it as you did before.
    title_elem = job_elem.find('h2', class_='title')
    company_elem = job_elem.find('div', class_='company')
    location_elem = job_elem.find('div', class_='location')
    print(title_elem)
    print(company_elem)
    print(location_elem)
    print()

<h2 class="title"><a data-bypass="true" data-m_impr_a_placement_id="JSR2CW" data-m_impr_j_cid="660" data-m_impr_j_coc="" data-m_impr_j_jawsid="435655462" data-m_impr_j_jobid="2219341" data-m_impr_j_jpm="2" data-m_impr_j_jpt="3" data-m_impr_j_lat="0" data-m_impr_j_lid="0" data-m_impr_j_long="0" data-m_impr_j_occid="11970" data-m_impr_j_p="1" data-m_impr_j_postingid="36b509cf-114c-48aa-aede-e6574b6cbff5" data-m_impr_j_pvc="ec3a6188-6a80-441a-814d-a9e2c9b76318" data-m_impr_s_t="t" data-m_impr_uuid="2c9cbc3d-d0a9-4839-a00c-7289830040f4" href="https://job-openings.monster.com/senior-lead-software-engineer-browser-sunnyvale-ca-plantation-fl-hq-austin-tx-culver-new-york-city-ca-seattle-wa-toronto-ny-us-magic-leap-inc/36b509cf-114c-48aa-aede-e6574b6cbff5" onclick="clickJobTitle('plid=0&amp;pcid=660&amp;poccid=11970','Software Developer',''); clickJobTitleSiteCat('{&quot;events.event48&quot;:&quot;true&quot;,&quot;eVar25&quot;:&quot;Senior/Lead Software Engineer, Browser&quot;,&quot;eVar66&quot

In [None]:
#Run the following code snippet and you’ll see the text content displayed.
#However, you’ll also get a lot of whitespace. Since you’re now working with Python strings,
#you can .strip() the superfluous whitespace.
#You can also apply any other familiar Python string methods to further clean up your text.

# for job_elem in job_elems:
#     title_elem = job_elem.find('h2', class_='title')
#     company_elem = job_elem.find('div', class_='company')
#     location_elem = job_elem.find('div', class_='location')
#     print(title_elem.text)
#     print(company_elem.text)
#     print(location_elem.text)
#     print()

In [31]:
#Incase of AttributeError: 'NoneType' object has no attribute 'text' Error, use this code
for job_elem in job_elems:
    title_elem = job_elem.find('h2', class_='title')
    company_elem = job_elem.find('div', class_='company')
    location_elem = job_elem.find('div', class_='location')
    if None in (title_elem, company_elem, location_elem):
        continue
    print(title_elem.text.strip())
    print(company_elem.text.strip())
    print(location_elem.text.strip())
    print()

Senior/Lead Software Engineer, Browser
Magic Leap, Inc.
Sunnyvale, CA; Plantation, FL (HQ); Austin, TX; Culver New York City, CA; Seattle, WA; Toronto, NY

SQL BI (SSRS, SSIS) developer for Blackboard - NYC
LanceSoft Inc
New york, WA

Analytics Developer
Conoco Phillips
Brisbane, QLD

Mid/Senior Software Engineer, SDK
Magic Leap, Inc.
Plantation, FL; Sunnyvale, CA; Austin, TX; Culver New York City, CA; Seattle, WA; Toronto, NY

Python Developer
LanceSoft Inc
Woodlands, WA

Customer Solutions Architect (Software) Professional Services Cyber Security
Varmour
Sydney, NSW

Software Platform Architect
Magic Leap, Inc.
Plantation, FL; Sunnyvale, CA; Culver New York City, CA; Austin, TX; Seattle, WA; Toronto, NY

Junior QA Analyst - Melbourne, Victoria
Mediaocean
Melbourne, VIC

Payroll Tester
Dialog Group
Brisbane, QLD

Growth Strategic Account Executive
Twilio
Sydney, New South Wales, NSW

Software Developer - Software Developer, Python, Golang
CyberCoders
San Francisco, CA

Enterprise Acco