# Linkedin Web Scraper
### Goal:
- given: acquired skills, not axquired skills, job type
- output: jobs in order of best to worst match

In [4]:
import logging
from linkedin_jobs_scraper import LinkedinScraper
from linkedin_jobs_scraper.events import Events, EventData
from linkedin_jobs_scraper.query import Query, QueryOptions, QueryFilters
from linkedin_jobs_scraper.filters import RelevanceFilters, TimeFilters, TypeFilters, ExperienceLevelFilters, RemoteFilters

#### Constants

In [3]:
ACQUIRED = "python, java, numpy, sql, machine learning, statistical analysis, tableau, r"
NOT_ACQUIRED = "spark, hadoop, apache, spark, html, css, javascript, react"
JOB_TYPE = "internship"

## Scraping

In [5]:
logging.basicConfig(level = logging.INFO)

In [6]:
def on_data(data: EventData):
    print('[ON_DATA]', data.title, data.company, data.date, data.link, len(data.description))


def on_error(error):
    print('[ON_ERROR]', error)


def on_end():
    print('[ON_END]')

#### Defining the scraper

In [14]:
scraper = LinkedinScraper(
    # chrome_executable_path=None, # Custom Chrome executable path (e.g. /foo/bar/bin/chromedriver) 
    chrome_options=None,  # Custom Chrome options here
    # headless=True,  # Overrides headless mode only if chrome_options is None
    max_workers=1,  # How many threads will be spawned to run queries concurrently (one Chrome driver for each thread)
    slow_mo=1.3,  # Slow down the scraper to avoid 'Too many requests (429)' errors
)

# Add event listeners
scraper.on(Events.DATA, on_data)
scraper.on(Events.ERROR, on_error)
scraper.on(Events.END, on_end)

INFO:li:scraper:('Using strategy AnonymousStrategy',)


#### Defining the query

In [17]:
queries = [
    Query(
        options=QueryOptions(
            optimize=True,  # Blocks requests for resources like images and stylesheet
            limit=27  # Limit the number of jobs to scrape
        )
    ),
    Query(
        query='Data',
        options=QueryOptions(
            locations=['United States'],
            optimize=False,
            limit=5,
            filters=QueryFilters(
                relevance=RelevanceFilters.RECENT,
                time=TimeFilters.MONTH,
                type=[TypeFilters.FULL_TIME, TypeFilters.INTERNSHIP],
                experience=None,                
            )
        )
    ),
]

In [None]:
scraper.run(queries)

INFO:li:scraper:('Starting new query', "Query(query= options=QueryOptions(limit=27 locations=['Worldwide'] optimize=True))")
INFO:li:scraper:('[][Worldwide]', 'Opening https://www.linkedin.com/jobs/search?location=Worldwide')
INFO:li:scraper:('[][Worldwide]', 'Trying first selectors set')
INFO:li:scraper:('[][Worldwide]', 'Trying second selectors set')
INFO:li:scraper:('[][Worldwide]', 'OK')
INFO:li:scraper:('[][Worldwide]', 'Starting pagination loop')
INFO:li:scraper:('[][Worldwide]', 'Found 21 jobs')
ERROR:li:scraper:('[][Worldwide][1]', JavascriptException("javascript error: Cannot read properties of null (reading 'scrollIntoView')\n  (Session info: headless chrome=96.0.4664.110)", None, ['Backtrace:', '\tOrdinal0 [0x00446903+2517251]', '\tOrdinal0 [0x003DF8E1+2095329]', '\tOrdinal0 [0x002E2848+1058888]', '\tOrdinal0 [0x002E4F44+1068868]', '\tOrdinal0 [0x002E4E0E+1068558]', '\tOrdinal0 [0x002E56BA+1070778]', '\tOrdinal0 [0x003364F9+1402105]', '\tOrdinal0 [0x003264D3+1336531]', '\tOr

[ON_ERROR] Message: javascript error: Cannot read properties of null (reading 'scrollIntoView')
  (Session info: headless chrome=96.0.4664.110)
Stacktrace:
Backtrace:
	Ordinal0 [0x00446903+2517251]
	Ordinal0 [0x003DF8E1+2095329]
	Ordinal0 [0x002E2848+1058888]
	Ordinal0 [0x002E4F44+1068868]
	Ordinal0 [0x002E4E0E+1068558]
	Ordinal0 [0x002E56BA+1070778]
	Ordinal0 [0x003364F9+1402105]
	Ordinal0 [0x003264D3+1336531]
	Ordinal0 [0x00335BBF+1399743]
	Ordinal0 [0x0032639B+1336219]
	Ordinal0 [0x003027A7+1189799]
	Ordinal0 [0x00303609+1193481]
	GetHandleVerifier [0x005D5904+1577972]
	GetHandleVerifier [0x00680B97+2279047]
	GetHandleVerifier [0x004D6D09+534521]
	GetHandleVerifier [0x004D5DB9+530601]
	Ordinal0 [0x003E4FF9+2117625]
	Ordinal0 [0x003E98A8+2136232]
	Ordinal0 [0x003E99E2+2136546]
	Ordinal0 [0x003F3541+2176321]
	BaseThreadInitThunk [0x75DAFA29+25]
	RtlGetAppContainerNamedObjectPath [0x77C47A9E+286]
	RtlGetAppContainerNamedObjectPath [0x77C47A6E+238]

Traceback (most recent call last):
  

INFO:li:scraper:('[][Worldwide][1]', 'Processed')


[ON_DATA] Mergers and Acquisitions Analyst Pure Search 2022-01-08 https://www.linkedin.com/jobs/view/mergers-and-acquisitions-analyst-at-pure-search-2867691805?refId=asPCmJ9NFmRRCMwiHJtikw%3D%3D&trackingId=coTRlztgNMs7R0j0Ixlj7g%3D%3D&position=2&pageNum=0&trk=public_jobs_jserp-result_search-card 1211


INFO:li:scraper:('[][Worldwide][2]', 'Processed')


[ON_DATA] Intern, Sales (New York City) American Airlines 2022-01-07 https://www.linkedin.com/jobs/view/intern-sales-new-york-city-at-american-airlines-2871117272?refId=asPCmJ9NFmRRCMwiHJtikw%3D%3D&trackingId=Ps22YV1P5Rg9fK6jGMKD1w%3D%3D&position=3&pageNum=0&trk=public_jobs_jserp-result_search-card 4209


INFO:li:scraper:('[][Worldwide][3]', 'Processed')


[ON_DATA] Content Moderator, Pinterest TV (Contract) Pinterest 2022-01-08 https://www.linkedin.com/jobs/view/content-moderator-pinterest-tv-contract-at-pinterest-2867697048?refId=asPCmJ9NFmRRCMwiHJtikw%3D%3D&trackingId=ZMpXjReoMAFH0tRq58UZlA%3D%3D&position=4&pageNum=0&trk=public_jobs_jserp-result_search-card 1800


ERROR:li:scraper:('[][Worldwide][4]', JavascriptException("javascript error: Cannot read properties of null (reading 'scrollIntoView')\n  (Session info: headless chrome=96.0.4664.110)", None, ['Backtrace:', '\tOrdinal0 [0x00446903+2517251]', '\tOrdinal0 [0x003DF8E1+2095329]', '\tOrdinal0 [0x002E2848+1058888]', '\tOrdinal0 [0x002E4F44+1068868]', '\tOrdinal0 [0x002E4E0E+1068558]', '\tOrdinal0 [0x002E56BA+1070778]', '\tOrdinal0 [0x003364F9+1402105]', '\tOrdinal0 [0x003264D3+1336531]', '\tOrdinal0 [0x00335BBF+1399743]', '\tOrdinal0 [0x0032639B+1336219]', '\tOrdinal0 [0x003027A7+1189799]', '\tOrdinal0 [0x00303609+1193481]', '\tGetHandleVerifier [0x005D5904+1577972]', '\tGetHandleVerifier [0x00680B97+2279047]', '\tGetHandleVerifier [0x004D6D09+534521]', '\tGetHandleVerifier [0x004D5DB9+530601]', '\tOrdinal0 [0x003E4FF9+2117625]', '\tOrdinal0 [0x003E98A8+2136232]', '\tOrdinal0 [0x003E99E2+2136546]', '\tOrdinal0 [0x003F3541+2176321]', '\tBaseThreadInitThunk [0x75DAFA29+25]', '\tRtlGetAppContai

[ON_ERROR] Message: javascript error: Cannot read properties of null (reading 'scrollIntoView')
  (Session info: headless chrome=96.0.4664.110)
Stacktrace:
Backtrace:
	Ordinal0 [0x00446903+2517251]
	Ordinal0 [0x003DF8E1+2095329]
	Ordinal0 [0x002E2848+1058888]
	Ordinal0 [0x002E4F44+1068868]
	Ordinal0 [0x002E4E0E+1068558]
	Ordinal0 [0x002E56BA+1070778]
	Ordinal0 [0x003364F9+1402105]
	Ordinal0 [0x003264D3+1336531]
	Ordinal0 [0x00335BBF+1399743]
	Ordinal0 [0x0032639B+1336219]
	Ordinal0 [0x003027A7+1189799]
	Ordinal0 [0x00303609+1193481]
	GetHandleVerifier [0x005D5904+1577972]
	GetHandleVerifier [0x00680B97+2279047]
	GetHandleVerifier [0x004D6D09+534521]
	GetHandleVerifier [0x004D5DB9+530601]
	Ordinal0 [0x003E4FF9+2117625]
	Ordinal0 [0x003E98A8+2136232]
	Ordinal0 [0x003E99E2+2136546]
	Ordinal0 [0x003F3541+2176321]
	BaseThreadInitThunk [0x75DAFA29+25]
	RtlGetAppContainerNamedObjectPath [0x77C47A9E+286]
	RtlGetAppContainerNamedObjectPath [0x77C47A6E+238]

Traceback (most recent call last):
  

INFO:li:scraper:('[][Worldwide][4]', 'Processed')


[ON_DATA] Customer Service Coordinator Part Time American Airlines 2022-01-08 https://www.linkedin.com/jobs/view/customer-service-coordinator-part-time-at-american-airlines-2871115446?refId=asPCmJ9NFmRRCMwiHJtikw%3D%3D&trackingId=spQxtNX8LbuIo1lqobIIIg%3D%3D&position=6&pageNum=0&trk=public_jobs_jserp-result_search-card 4365


INFO:li:scraper:('[][Worldwide][5]', 'Processed')


[ON_DATA] Part-time Executive Virtual Assistant Team Delegate, LLC 2022-01-08 https://www.linkedin.com/jobs/view/part-time-executive-virtual-assistant-at-team-delegate-llc-2871122255?refId=asPCmJ9NFmRRCMwiHJtikw%3D%3D&trackingId=16tXMN%2F1h3W9HL2Ry6tmVg%3D%3D&position=7&pageNum=0&trk=public_jobs_jserp-result_search-card 1452


INFO:li:scraper:('[][Worldwide][6]', 'Processed')


[ON_DATA] Customer Assistance Representative Part Time American Airlines 2022-01-08 https://www.linkedin.com/jobs/view/customer-assistance-representative-part-time-at-american-airlines-2871115445?refId=asPCmJ9NFmRRCMwiHJtikw%3D%3D&trackingId=Vq4eLDmYm%2FWCN8Cn6yreOA%3D%3D&position=8&pageNum=0&trk=public_jobs_jserp-result_search-card 4285


INFO:li:scraper:('[][Worldwide][7]', 'Processed')


[ON_DATA] Account Manager American Airlines 2022-01-08 https://www.linkedin.com/jobs/view/account-manager-at-american-airlines-2871112739?refId=asPCmJ9NFmRRCMwiHJtikw%3D%3D&trackingId=5LAiLg46A4q%2FG8G%2FiKwj9w%3D%3D&position=9&pageNum=0&trk=public_jobs_jserp-result_search-card 4018


ERROR:li:scraper:('[][Worldwide][8]', JavascriptException("javascript error: Cannot read properties of null (reading 'scrollIntoView')\n  (Session info: headless chrome=96.0.4664.110)", None, ['Backtrace:', '\tOrdinal0 [0x00446903+2517251]', '\tOrdinal0 [0x003DF8E1+2095329]', '\tOrdinal0 [0x002E2848+1058888]', '\tOrdinal0 [0x002E4F44+1068868]', '\tOrdinal0 [0x002E4E0E+1068558]', '\tOrdinal0 [0x002E56BA+1070778]', '\tOrdinal0 [0x003364F9+1402105]', '\tOrdinal0 [0x003264D3+1336531]', '\tOrdinal0 [0x00335BBF+1399743]', '\tOrdinal0 [0x0032639B+1336219]', '\tOrdinal0 [0x003027A7+1189799]', '\tOrdinal0 [0x00303609+1193481]', '\tGetHandleVerifier [0x005D5904+1577972]', '\tGetHandleVerifier [0x00680B97+2279047]', '\tGetHandleVerifier [0x004D6D09+534521]', '\tGetHandleVerifier [0x004D5DB9+530601]', '\tOrdinal0 [0x003E4FF9+2117625]', '\tOrdinal0 [0x003E98A8+2136232]', '\tOrdinal0 [0x003E99E2+2136546]', '\tOrdinal0 [0x003F3541+2176321]', '\tBaseThreadInitThunk [0x75DAFA29+25]', '\tRtlGetAppContai

[ON_ERROR] Message: javascript error: Cannot read properties of null (reading 'scrollIntoView')
  (Session info: headless chrome=96.0.4664.110)
Stacktrace:
Backtrace:
	Ordinal0 [0x00446903+2517251]
	Ordinal0 [0x003DF8E1+2095329]
	Ordinal0 [0x002E2848+1058888]
	Ordinal0 [0x002E4F44+1068868]
	Ordinal0 [0x002E4E0E+1068558]
	Ordinal0 [0x002E56BA+1070778]
	Ordinal0 [0x003364F9+1402105]
	Ordinal0 [0x003264D3+1336531]
	Ordinal0 [0x00335BBF+1399743]
	Ordinal0 [0x0032639B+1336219]
	Ordinal0 [0x003027A7+1189799]
	Ordinal0 [0x00303609+1193481]
	GetHandleVerifier [0x005D5904+1577972]
	GetHandleVerifier [0x00680B97+2279047]
	GetHandleVerifier [0x004D6D09+534521]
	GetHandleVerifier [0x004D5DB9+530601]
	Ordinal0 [0x003E4FF9+2117625]
	Ordinal0 [0x003E98A8+2136232]
	Ordinal0 [0x003E99E2+2136546]
	Ordinal0 [0x003F3541+2176321]
	BaseThreadInitThunk [0x75DAFA29+25]
	RtlGetAppContainerNamedObjectPath [0x77C47A9E+286]
	RtlGetAppContainerNamedObjectPath [0x77C47A6E+238]

Traceback (most recent call last):
  

INFO:li:scraper:('[][Worldwide][8]', 'Processed')


[ON_DATA] Medical Economics Analyst CVS Health 2022-01-08 https://www.linkedin.com/jobs/view/medical-economics-analyst-at-cvs-health-2871106328?refId=asPCmJ9NFmRRCMwiHJtikw%3D%3D&trackingId=oqOm3FD1c9MoSIBNScdESA%3D%3D&position=11&pageNum=0&trk=public_jobs_jserp-result_search-card 3907


