In [1]:
import requests
import bs4
from bs4 import BeautifulSoup
import pandas as pd

In [4]:
class WebCrawler(object):

    def __init__(self, title = '',location = ""):

        self._url = "https://www.indeed.co.in/jobs"
        self._headers = {'User-Agent': "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/43.0.2357.134 Safari/537.36"}
        self._title = title
        self._location = location
        self.params = {
            'q': 'Python',
            'l': 'Mumbai, Maharashtra'}

    def get(self):

        try:

            r = requests.get(url=self._url,
                              headers=self._headers ,
                              params=self.params)
            return r.text

        except Exception as e:

            print("Failed to make response to Indeed")



class DataStructure():

    def __init__(self):
        self.data = {
            'title':[],
            'location':[],
            'summary':[],
            'date':[],
            'link':[]
        }




class DataCleaning(object):

    def __init__(self, title = '', location = ""):
        self._title = title
        self._location = location
        self._webcrawler = WebCrawler(self._title, self._location)
        self.data = self._webcrawler.get()
        self.datastructure = DataStructure()

    def getData(self):
        soup = BeautifulSoup(self.data, 'html.parser')

        for x in soup.findAll('div', class_="jobsearch-SerpJobCard unifiedRow row result"):

            title = x.find(class_="title").text.strip()
            self.datastructure.data["title"].append(title)


            location = x.find(class_="location accessible-contrast-color-location").text.strip()
            self.datastructure.data["location"].append(location)


            summary = x.find(class_="summary")
            self.datastructure.data["summary"].append(summary.text)

            date = x.find(class_="date")
            self.datastructure.data["date"].append(date)


            link = x.find('a', href=True)
            base_url = "https://www.indeed.com/"


            Final = base_url + link["href"]

            self.datastructure.data["link"].append(Final)


        data = list(zip(
            self.datastructure.data["title"],  self.datastructure.data["location"],
            self.datastructure.data["summary"],self.datastructure.data["date"],
            self.datastructure.data["link"]
        ))

        df = pd.DataFrame(data=data, columns=["title", "location", "summary", "date", "link" ])
        return df




class IndeedJobSearch(object):

    def __init__(self, title = '', location = ""):

        self.title = title
        self.location = location
        self.datacleaning = DataCleaning(title=self.title, location=self.location)

    def getJobs(self):
        data = self.datacleaning.getData()
        return data

    def saveCsv(self):

        data = self.datacleaning.getData()
        data.to_csv("Jobs.csv")

    def saveExcel(self):

        data = self.datacleaning.getData()
        data.to_excel("job.xls")



if __name__ == "__main__":

    jobsearch = IndeedJobSearch(title='Python', location="San Francisco , CA")
    data  = jobsearch.getJobs()
    print(data)
    # jobsearch.saveExcel()
    #jobsearch.saveCsv()


                                                title  \
0      Online Python Trainer (Female candidates only)   
1                                      Data Scientist   
2                       Assistant Manager - Analytics   
3                                    Python Developer   
4                                           Associate   
5   Python Development part time job/internship at...   
6   Hiring for Python Developer - Craftsvilla Hand...   
7                                   Python & Embedded   
8                       Programmer Associate - Python   
9                                    Python Developer   
10                                  Python Developers   
11  Looking for Full stack Python Developer ! Fina...   
12     Senior Manager - Computer Science (Curriculum)   
13                                Analyst - Analytics   

                      location  \
0          Mumbai, Maharashtra   
1          Mumbai, Maharashtra   
2          Mumbai, Maharashtra   
3       

In [10]:
data

Unnamed: 0,title,location,summary,date,link
0,Online Python Trainer (Female candidates only),"Mumbai, Maharashtra",\n\nWhiteHat Jr* was created to solve the #1 g...,[30+ days ago],https://www.indeed.com//cmp/Whitehat-Education...
1,Data Scientist,"Mumbai, Maharashtra",\n\nThis project requires a data scientist to ...,[30+ days ago],https://www.indeed.com//pagead/clk?mo=r&ad=-6N...
2,Assistant Manager - Analytics,"Mumbai, Maharashtra",\n\nRelocation Assistance Offered Within Count...,[30+ days ago],https://www.indeed.com//pagead/clk?mo=r&ad=-6N...
3,Python Developer,"Mumbai, Maharashtra",\n\nExpert python developer with exposure to D...,[Today],https://www.indeed.com//company/Corpusvision-T...
4,Associate,"Mumbai, Maharashtra",\n\nThis is a senior production support role r...,[1 day ago],https://www.indeed.com//rc/clk?jk=ef789b1878c0...
5,Python Development part time job/internship at...,"Mumbai, Maharashtra",\n\nMerilent Interactive is an independent sof...,[7 days ago],https://www.indeed.com//rc/clk?jk=887e6b10d099...
6,Hiring for Python Developer - Craftsvilla Hand...,"Mumbai, Maharashtra","\n\nWrite “clean”, well-designed back-end code...",[10 days ago],https://www.indeed.com//company/Craftsvilla-Ha...
7,Python & Embedded,"Mahim, Mumbai, Maharashtra","\n\nProgramming languages - Java, c, c++, pyth...",[30+ days ago],https://www.indeed.com//company/Manshu-Comtel-...
8,Programmer Associate - Python,"Mumbai, Maharashtra","\n\nDesign, build and maintain efficient, reus...",[10 days ago],https://www.indeed.com//rc/clk?jk=ba6592664900...
9,Python Developer,"Mumbai, Maharashtra",\n\nLTI (NSE: LTI) is a global technology cons...,[8 days ago],https://www.indeed.com//rc/clk?jk=4bc30a68b86c...
