# <font color='#2F4F4F'>AfterWork Data Science: Web Scraping with Python</font>

## <font color='#2F4F4F'>Prerequisites</font>

In [1]:
# We first import the required libraries
# ---
#
import pandas as pd             # library for data manupation
import requests                 # library for fetching a web page 
from bs4 import BeautifulSoup   # library for extrating contents from a webpage 

## <font color='#2F4F4F'>Step 1: Obtaining our Data</font>

In [2]:
# PigiaMe: https://www.pigiame.co.ke/it-software-jobs
# ---
#
pigia_me = requests.get('https://www.pigiame.co.ke/it-software-jobs')
pigia_me

<Response [200]>

In [3]:
# MyJobMag: https://www.myjobmag.co.ke/jobs-by-field/information-technology
# ---
#
my_job_mag = requests.get('https://www.myjobmag.co.ke/jobs-by-field/information-technology')
my_job_mag

<Response [200]>

In [4]:
# KenyanJob: https://www.kenyajob.com/job-vacancies-search-kenya?f%5B0%5D=im_field_offre_secteur%3A133
# ---
#
kenyan_job = requests.get('https://www.kenyajob.com/job-vacancies-search-kenya?f%5B0%5D=im_field_offre_secteur%3A133')
kenyan_job

<Response [200]>

## <font color='#2F4F4F'>Step 2: Parsing</font>

In [77]:
# Parsing our document: pigia_me
# ---
# 
pigia_me_soup = BeautifulSoup(pigia_me.text, "html.parser")

In [75]:
# Parsing our document: my_job_mag
# ---
#  
my_job_mag_soup = BeautifulSoup(my_job_mag.text, "html.parser")

In [76]:
# Parsing our document: kenyan_job
# ---
# 
kenya_job_soup = BeautifulSoup(kenyan_job.content, "html.parser")

## <font color='#2F4F4F'>Step 3: Extracting Required Elements</font>

In [78]:
# 1. Extracting job titles and links: pigia me
# ---
# 
# Target tags: 
# <div> tags contained in <div class="listing-cards">

#pigia_me_tags = pigia_me_soup.find('div', attrs={'class':'listings-cards__list'})
pigia_me_tags = pigia_me_soup.find('div', attrs={'class':'search__content'})
title_tags = pigia_me_tags.find_all('div', attrs={'class':'listing-card__header__title'})
link_tags = pigia_me_tags.find_all('a', attrs={'class':'listing-card__inner', 'data-t-listing_context':'search'}, href=True)
pigia_me_titles = [tag.text.strip() for tag in title_tags]
pigia_me_links = [tag['href'].strip() for tag in link_tags]
#links,titles
#link_tags[0]['href']

In [79]:
# 2. Extracting job titles: my_job_mag
# ---
# 
# Target tags: 
# <a> and <li> tags contained in <ul class="job-list">

myjob_mag_tags = my_job_mag_soup.find('ul', attrs={'class':'job-list'})
title_tags = myjob_mag_tags.find_all('a') #, attrs={'class':'listing-card__header__title'})
link_tags = myjob_mag_tags.find_all('a', href=True) #, attrs={'class':'listing-card__inner', 'data-t-listing_context':'search'}, href=True)
my_job_mag_titles = [tag.text.strip() for tag in title_tags]
my_job_mag_links = ["https://www.myjobmag.co.ke" + tag['href'].strip() for tag in link_tags]
#links,titles

In [80]:
# 3. Extracting job titles: kenya_job
# ---
#
# Target tags: 
# <div> and <h5> tags contained in <div class="jobsearch-search-results-box">

kenya_job_tags = kenya_job_soup.find('div', attrs={'id':'jobsearch-search-results-box'})
title_tags = kenya_job_tags.find_all('h5')
link_tags = kenya_job_tags.find_all('div', attrs={'class':'job-description-wrapper'}) #, attrs={'class':'listing-card__inner', 'data-t-listing_context':'search'}, href=True)

kenya_job_titles = [tag.text.strip() for tag in title_tags]
kenya_job_links = [tag.get('data-href') for tag in link_tags]
#titles,links

## <font color='#2F4F4F'>Step 4: Saving our Data</font>

In [87]:
# Saving the scraped contents in a dataframe and preview our data
# ---
#
# Pigia me  dtaframe
df_pigia_me = pd.DataFrame({'job_title':pigia_me_titles, 'job_link':pigia_me_links})
df_my_job_mag = pd.DataFrame({'job_title':my_job_mag_titles, 'job_link':my_job_mag_links})
df_kenya_job = pd.DataFrame({'job_title':kenya_job_titles, 'job_link':kenya_job_links})

# Concatenate the dataframes
final_df = pd.concat([df_pigia_me, df_my_job_mag, df_kenya_job], axis=0)
final_df

Unnamed: 0,job_title,job_link
0,Assistant IT Administrator,https://www.pigiame.co.ke/listings/assistant-i...
1,Frontend Developer,https://www.pigiame.co.ke/listings/frontend-de...
2,CRM Enginee,https://www.pigiame.co.ke/listings/crm-enginee...
3,Analyst Programmer,https://www.pigiame.co.ke/listings/analyst-pro...
4,Senior Game Developer,https://www.pigiame.co.ke/listings/senior-game...
...,...,...
20,DevOps Engineer (M/F),https://www.kenyajob.com/job-vacancies-kenya/d...
21,DOTNET Developer (M/F),https://www.kenyajob.com/job-vacancies-kenya/d...
22,IOS Developer (M/F),https://www.kenyajob.com/job-vacancies-kenya/i...
23,JAVA Developer (M/F),https://www.kenyajob.com/job-vacancies-kenya/j...
