In [16]:
import requests
import bs4
from bs4 import BeautifulSoup
import pandas as pd

In [17]:
headers = {"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.77 Safari/537.36"}
link="https://in.indeed.com/jobs?q=Data+Scientist&l=Bangalore%2C+Karnataka&start=10"

""" Sending HTTP Request"""
r=requests.get(link,headers=headers)

"""Check response status Code"""
r.status_code

200

In [18]:
"""URL Template"""
url_temp= "https://in.indeed.com/jobs?q={}&l={}&start={}"
base_link="https://in.indeed.com"

"""This function takes the URL template, designation and city as inputs.
It navigates through the top 200 search results and scans all the <a> tags and returns a list of 
all the href attributes."""

def get_href(url_temp,position,city):
    results_per_city=200
    href_list=[]
    for start in range(0,results_per_city+10,10):
        url=url_temp.format(position,city,start)
        r=requests.get(url,headers=headers)
        soup=BeautifulSoup(r.text,"html.parser")    
       
        for i in soup.find_all('a'):
            # if tag has attribute of class
            if i.has_attr( "href" ):
                k=i['href']
                href_list.append(base_link+k)
    
    return href_list

In [19]:
"""This function takes the list of all the href attributes as input, 
finds the URLs with the mentioned strings and returns a list of those URLs."""

def get_job_links(href_list):
    job_links=[]
    for a in href_list:
        if a.find('/rc/clk')!=-1:
            job_links.append(a)
        elif a.find('/company/')!=-1:
            job_links.append(a)
    return job_links

In [20]:
"""This function takes the list of the URLs of the job postings and the city and does the following:
1. Send HTTP request to each of the URL.
2. Creates a soup object with html parsing.
3. Extracts title, company name, location and job description from each of the webpage and returns a dataframe."""

def get_job_df(job_links,city):
    df=pd.DataFrame(columns=["job_location", "job_title", "company", "job_description"])
    
    for i in job_links:
        req=requests.get(i,headers=headers)
        soup_req=BeautifulSoup(req.text,"html.parser")
        try:
            title=soup_req.find('h1',{'class': 'icl-u-xs-mb--xs icl-u-xs-mt--none jobsearch-JobInfoHeader-title'}).text
        except:
            continue
        try:
            company=soup_req.find('div',{'class':'icl-u-lg-mr--sm icl-u-xs-mr--xs'}).text
        except:
            continue
        try:
            location=soup_req.find('div',{'class':'jobsearch-InlineCompanyRating icl-u-xs-mt--xs jobsearch-DesktopStickyContainer-companyrating'}).text
        except:
            location=city
        try:
            desc=soup_req.find('div',{'class':'jobsearch-jobDescriptionText'}).text
        except:
            continue
        df = df.append({"job_location":city, "job_title":title, "company":company, "job_description":desc},
                       ignore_index=True)
    
    return df

In [21]:
"""Calling all the above functions inside this function which takes the URL template, designation and city as inputs."""

def get_job_postings(url_temp,position,city):
    
    href_list= get_href(url_temp,position,city)
    
    job_links= get_job_links(href_list)
    
    job_df= get_job_df(job_links,city)
    
    return job_df

In [24]:
data_scientist_df= get_job_postings(url_temp,position='Data+Scientist',city='Bengaluru')

In [25]:
"""Job postings for Data Scientist in Bengaluru, India"""
data_scientist_df.head(10)

Unnamed: 0,job_location,job_title,company,job_description
0,Bengaluru,Data Scientist,"Reach52, Inc.",Overview of reach52 and our workreach52 is a f...
1,Bengaluru,Data Scientist,Myian pharma,Mylan is a global pharmaceutical company commi...
2,Bengaluru,Data Scientist,Ambee,We are building a globally scalable solution t...
3,Bengaluru,Data Scientist,BizViz Technologies Pvt Ltd,"Design and execute statistical analysis, model..."
4,Bengaluru,Data Scientist,P2FT Infobitz,SkillsData Scientist - Deep Learning (Camera) ...
5,Bengaluru,"Data Scientist - Machine Learning, Deep Learni...",Enterprise Bot,"At Enterprise Bot, we create AI-powered bots t..."
6,Bengaluru,Data Scientist,HP,Applies basic foundation of a function's princ...
7,Bengaluru,Data Scientist-CAI,HDFC Bank,Job Description\nScorecard Building and Modell...
8,Bengaluru,Data & Applied Scientist,Microsoft,What if your job description were simply “make...
9,Bengaluru,Data Scientist,CoStrategix,Job Description – Data Scientist\n\n\n3-5 year...


In [29]:
"""Job postings for Data Analyst in Hyderabad, India"""
data_analyst_hyd= get_job_postings(url_temp,position='Data+Analyst',city='Hyderabad')
data_analyst_hyd.head(10)

Unnamed: 0,job_location,job_title,company,job_description
0,Hyderabad,Data Quality Assurance Analyst - Manual Testing,Vamstar,Are you are interested in learning about healt...
1,Hyderabad,Data Analyst,Referenceglobe,Education Research Data Analyst About usRefere...
2,Hyderabad,Junior Data Analyst,SUPPLIER.IO,Job Description –* Junior Data Analyst / Suppo...
3,Hyderabad,Tableau Data Visualization Analyst,Milestone Technologies,"Company Overview:\n\nMilestone Technologies, I..."
4,Hyderabad,Data Analyst,IBI Group,Company Description\nWe are a global team of d...
5,Hyderabad,Research Analyst,LeanIX GmbH,"LeanIX, The Continuous Transformation Platform..."
6,Hyderabad,Junior Data Analyst,Qurium Solutions Inc,Job Description – Junior Data Analyst / Suppor...
7,Hyderabad,Data Analyst,Helius Technologies,Introduction:\nWe are looking for a high quali...
8,Hyderabad,Business Analyst,IODEVOPS SERVICES Pvt. Ltd,Domain Aspects: Minimum 5 years of working kno...
9,Hyderabad,Associate Business Analyst/Data Analytics/COO IT,HSBC,The health and safety of our employees and can...
