In [1]:
# Web Scraping For Job Portal
import csv
from datetime import datetime
import requests
from bs4 import BeautifulSoup

In [2]:
def get_url(position,location):
    position = position.replace(' ','+')
    location = location.replace(' ','+')
    template = 'https://in.indeed.com/jobs?q={}&l={}'
    url = template.format(position,location)
    return url

In [3]:
url = get_url('Python Developer','Noida,Uttar Pradesh')
print(url)

https://in.indeed.com/jobs?q=Python+Developer&l=Noida,Uttar+Pradesh


In [4]:
response = requests.get(url)

In [5]:
response

<Response [200]>

In [6]:
# response.text

In [7]:
soup = BeautifulSoup(response.text,'html.parser')

In [8]:
# Here we are searching for all table tags having jobCard_mainContent class name 
cards = soup.find_all('div', 'jobsearch-SerpJobCard')
print(len(cards))

15


In [9]:
print(cards[1])

<div class="jobsearch-SerpJobCard unifiedRow row result" data-jk="4022bff9b16d6005" data-tn-component="organicJob" id="p_4022bff9b16d6005">
<h2 class="title">
<a class="jobtitle turnstileLink" data-tn-element="jobTitle" href="/company/Fluper-Limited/jobs/Python-Django-Developer-4022bff9b16d6005?fccid=a8a863bdb921baff&amp;vjs=3" id="jl_4022bff9b16d6005" onclick="setRefineByCookie([]); return rclk(this,jobmap[1],true,1);" onmousedown="return rclk(this,jobmap[1],false,1);" rel="noopener nofollow" target="_blank" title="Python/Django Developer">
<b>Python</b>/Django <b>Developer</b></a>
<span class="new">new</span></h2>
<div class="sjcl">
<div>
<span class="company">
Fluper Limited</span>
</div>
<div class="recJobLoc" data-rc-loc="Noida, Uttar Pradesh" id="recJobLoc_4022bff9b16d6005" style="display: none"></div>
<span class="location accessible-contrast-color-location">Noida, Uttar Pradesh</span>
<span class="remote-bullet">•</span>
<span class="remote">Temporarily remote</span>
</div>
<di

In [10]:
# let's first prototype the model with a single record

card = cards[1]

In [11]:
atag = card.a

job_url = 'https://in.indeed.com'+atag.get('href')
job_url

'https://in.indeed.com/company/Fluper-Limited/jobs/Python-Django-Developer-4022bff9b16d6005?fccid=a8a863bdb921baff&vjs=3'

In [12]:
job_title = atag.get('title')
job_title

'Python/Django Developer'

In [13]:
# company_name
company = card.find('span','company').text.strip()
company

'Fluper Limited'

In [14]:
# job location
job_location = card.find('div', 'recJobLoc').get('data-rc-loc')
job_location

'Noida, Uttar Pradesh'

In [15]:
# job_summary

summary = card.find('div', 'summary').text.strip().replace('\n', ' ')
summary

'Develop back-end components to improve responsiveness and overall performance. Integrate user-facing elements into applications. Improve functionality of existing systems. Implement security and data protection solutions. Assess and prioritize feature requests. Work experience as a Python Developer.'

In [16]:
# job date
post_date = card.find('span','date').text
today = datetime.today().strftime('%Y-%m-%d')

post_date

'7 days ago'

In [17]:
job_salary = card.find('span', 'salaryText')
if job_salary:
    salary = job_salary.text.strip()
else:
    salary = ''

In [18]:
salary

'₹2,00,000 - ₹6,00,000 a year'

In [19]:
def get_record(card):
    """Extract Job Details From CARDS"""
    
    job_title = atag.get('title')
    job_url = 'https://in.indeed.com'+atag.get('href')
    company = card.find('span','company').text.strip()
    job_location = card.find('div', 'recJobLoc').get('data-rc-loc')
    summary = card.find('div', 'summary').text.strip().replace('\n', ' ')
    post_date = card.find('span','date').text
    today = datetime.today().strftime('%Y-%m-%d')
    try:
        job_salary = card.find('span', 'salaryText')
        if job_salary:
            salary = job_salary.text.strip()
        else:
            salary = ''
    except AttributeError:
        salary = ''
    
        
    record = (job_title, job_url, company , job_location , summary , post_date , today,salary)
    return record

In [20]:
records = []

for everyCard in cards:
    jobDetails = get_record(everyCard)
    records.append(jobDetails)

In [21]:
len(records)

15

In [22]:
records[0]

('Python/Django Developer',
 'https://in.indeed.com/company/Fluper-Limited/jobs/Python-Django-Developer-4022bff9b16d6005?fccid=a8a863bdb921baff&vjs=3',
 'Uni Global Tech',
 'Noida, Uttar Pradesh',
 'Good understanding of Object Oriented analysis and design. Strong Python Scripting Experience. Write Reusable, Testable, and Efficient Code. Implementation of security and data protection. Job Types: Full-time, Internship, Fresher. Salary: ₹8,000.00 - ₹15,000.00 per month.',
 '1 day ago',
 '2021-06-30',
 '₹8,000 - ₹15,000 a month')

In [23]:
records[1]

('Python/Django Developer',
 'https://in.indeed.com/company/Fluper-Limited/jobs/Python-Django-Developer-4022bff9b16d6005?fccid=a8a863bdb921baff&vjs=3',
 'Fluper Limited',
 'Noida, Uttar Pradesh',
 'Develop back-end components to improve responsiveness and overall performance. Integrate user-facing elements into applications. Improve functionality of existing systems. Implement security and data protection solutions. Assess and prioritize feature requests. Work experience as a Python Developer.',
 '7 days ago',
 '2021-06-30',
 '₹2,00,000 - ₹6,00,000 a year')

In [24]:
records[2]

('Python/Django Developer',
 'https://in.indeed.com/company/Fluper-Limited/jobs/Python-Django-Developer-4022bff9b16d6005?fccid=a8a863bdb921baff&vjs=3',
 'Shridhar life school',
 'Noida, Uttar Pradesh',
 'Urgent hiring for back end developer*. Should have 2- 6 years of experience in Python, Django, MySql and Mango DB. Knowledge of react java script. Knowledge of DJango rest framework. Identifying enhancement and modification that improve product quality. Basic understanding of front end development.',
 'Just posted',
 '2021-06-30',
 '₹35,000 - ₹50,000 a month')

In [25]:
records[3]

('Python/Django Developer',
 'https://in.indeed.com/company/Fluper-Limited/jobs/Python-Django-Developer-4022bff9b16d6005?fccid=a8a863bdb921baff&vjs=3',
 'Techamic Solutions',
 'Noida, Uttar Pradesh',
 'Should have a good understanding of OOPS. Should have a good hands on developing applications in Python and DJango. Should be able to work in a team. Good understanding of Data Models. Should be good at writing efficient queries. Should be able to break down the high-level requirements into…',
 '9 days ago',
 '2021-06-30',
 '₹4,00,000 - ₹8,00,000 a year')

In [53]:
# Getting The Next Page
while True:
    try:
        url = 'https://in.indeed.com/'+soup.find('a',{'aria-label':'Next'}).get('href')
    except AttributeError:
        break
    
    
    response = requests.get(url)
    soup = BeautifulSoup(response.text,'html.parser')
    cards = soup.find_all('div', 'jobsearch-SerpJobCard')
    
    for everyCard in cards:
        jobDetails = get_record(everyCard)
        records.append(jobDetails)

In [54]:
len(records)

121

In [28]:
# Putting It All Together

import csv
from datetime import datetime
import requests
from bs4 import BeautifulSoup

def get_url(position,location):
    position = position.replace(' ','+')
    location = location.replace(' ','+')
    template = 'https://in.indeed.com/jobs?q={}&l={}'
    url = template.format(position,location)
    return url


def get_record(card):
    """Extract Job Details From CARDS"""
    
    job_title = atag.get('title')
    job_url = 'https://in.indeed.com'+atag.get('href')
    company = card.find('span','company').text.strip()
    job_location = card.find('div', 'recJobLoc').get('data-rc-loc')
    summary = card.find('div', 'summary').text.strip().replace('\n', ' ')
    post_date = card.find('span','date').text
    today = datetime.today().strftime('%Y-%m-%d')
    try:
        job_salary = card.find('span', 'salaryText')
        if job_salary:
            salary = job_salary.text.strip()
        else:
            salary = ''
    except AttributeError:
        salary = ''
    
        
    record = (job_title, job_url, company , job_location , summary , post_date , today,salary)
    return record


def main(position,location):
    records = []
    url = get_url(position,location)
    
    while True:
        response = requests.get(url)
        soup = BeautifulSoup(response.text,'html.parser')
        cards = soup.find_all('div', 'jobsearch-SerpJobCard')
        
        for everyCard in cards:
            jobDetails = get_record(everyCard)
            records.append(jobDetails)
            
        try:
            url = 'https://in.indeed.com/'+soup.find('a',{'aria-label':'Next'}).get('href')
        except AttributeError:
            break
            
        
        # Save the job information in a csv file
        
        with open('jobResultsNew.csv','w',newline='',encoding='utf-8') as f:
            writer = csv.writer(f)
            writer.writerow(['JobTitle','JobUrl','Company','Location','Summary','PostDate','ExtractDate','Salary'])
            writer.writerows(records)
            

In [29]:
#main('Python Developer','Noida,Uttar Pradesh')

In [30]:
#main('Java','Noida,Uttar Pradesh')