# **Collecting Job Data Using APIs**


#### Instructions


## Warm-Up Exercise


In [4]:
import requests 
import pandas as pd

In [6]:
api_url = "http://api.open-notify.org/astros.json" # this url gives use the astronaut data

In [7]:
response = requests.get(api_url) # Call the API using the get method and store the
                                # output of the API call in a variable called response.

In [8]:
if response.ok:             # if all is well() no errors, no network timeouts)
    data = response.json()  # store the result in json format in a variable called data
                            # the variable data is of type dictionary.

In [9]:
print(data)   # print the data just to check the output or for debugging

{'people': [{'craft': 'ISS', 'name': 'Oleg Kononenko'}, {'craft': 'ISS', 'name': 'Nikolai Chub'}, {'craft': 'ISS', 'name': 'Tracy Caldwell Dyson'}, {'craft': 'ISS', 'name': 'Matthew Dominick'}, {'craft': 'ISS', 'name': 'Michael Barratt'}, {'craft': 'ISS', 'name': 'Jeanette Epps'}, {'craft': 'ISS', 'name': 'Alexander Grebenkin'}, {'craft': 'ISS', 'name': 'Butch Wilmore'}, {'craft': 'ISS', 'name': 'Sunita Williams'}, {'craft': 'Tiangong', 'name': 'Li Guangsu'}, {'craft': 'Tiangong', 'name': 'Li Cong'}, {'craft': 'Tiangong', 'name': 'Ye Guangfu'}], 'number': 12, 'message': 'success'}


Print the number of astronauts currently on ISS.


In [10]:
print(data.get('number'))

12


Print the names of the astronauts currently on ISS.


In [7]:
astronauts = data.get('people')
print("There are {} astronauts on ISS".format(len(astronauts)))
print("And their names are :")
for astronaut in astronauts:
    print(astronaut.get('name'))

There are 12 astronauts on ISS
And their names are :
Oleg Kononenko
Nikolai Chub
Tracy Caldwell Dyson
Matthew Dominick
Michael Barratt
Jeanette Epps
Alexander Grebenkin
Butch Wilmore
Sunita Williams
Li Guangsu
Li Cong
Ye Guangfu


## Lab: Collect Jobs Data using GitHub Jobs API


### Objective: Determine the number of jobs currently open for various technologies  and for various locations


Collect the number of job postings for the following locations using the API:

* Los Angeles
* New York
* San Francisco
* Washington DC
* Seattle
* Austin
* Detroit


In [1]:
#Import required libraries
import pandas as pd
import json
import requests

#### Write a function to get the number of jobs for the Python technology.<br>
>
  
 ##### The keys in the json are 
 * Job Title
 
 * Job Experience Required
 
 * Key Skills
 
 * Role Category
 
 * Location
 
 * Functional Area
 
 * Industry
 
 * Role 
 
You can also view  the json file contents  from the following <a href = "https://cf-courses-data.s3.us.cloud-object-storage.appdomain.cloud/IBM-DA0321EN-SkillsNetwork/labs/module%201/Accessing%20Data%20Using%20APIs/jobs.json">json</a> URL.



In [3]:
import requests

api_url = "http://127.0.0.1:5000/data"

def get_number_of_jobs_T(technology):
    # Gửi yêu cầu đến API với tham số 'Key Skills'
    payload = {"Key Skills": technology}
    response = requests.get(api_url, params=payload)
    
    # Kiểm tra xem yêu cầu có thành công không
    if response.status_code == 200:
        # Chuyển đổi dữ liệu JSON thành danh sách Python
        jobs_data = response.json()
        
        # Đếm số công việc
        number_of_jobs = len(jobs_data)  # Số lượng công việc trả về
        return technology, number_of_jobs
    else:
        print(f"Lỗi: {response.status_code} - Không thể lấy dữ liệu từ API.")
        return technology, 0  # Trả về 0 nếu không thể lấy dữ liệu

# Ví dụ gọi hàm
technology = "Python"
result = get_number_of_jobs_T(technology)
print(f"Công nghệ: {result[0]}, Số lượng công việc: {result[1]}")

Công nghệ: Python, Số lượng công việc: 1173


Calling the function for Python and checking if it works.


In [4]:
get_number_of_jobs_T("C#")

('C#', 333)

#### Write a function to find number of jobs in US for a location of your choice


In [5]:
def get_number_of_jobs_L(location):
    payload = {"Location": location}
    response = requests.get(api_url, params=payload)
    
    # Kiểm tra xem yêu cầu có thành công không
    if response.status_code == 200:
        # Chuyển đổi dữ liệu JSON thành danh sách Python
        jobs_data = response.json()
        
        # Đếm số công việc
        number_of_jobs = len(jobs_data)  # Số lượng công việc trả về
        return location, number_of_jobs
    else:
        print(f"Lỗi: {response.status_code} - Không thể lấy dữ liệu từ API.")
        return location, 0  # Trả về 0 nếu không thể lấy dữ liệu
    return location,number_of_jobs

Call the function for Los Angeles and check if it is working.


In [6]:

location = "Los Angeles"
result = get_number_of_jobs_L(location)
print(f"Location: {result[0]}, Số lượng công việc: {result[1]}")

Location: Los Angeles, Số lượng công việc: 640


### Store the results in an excel file


Call the API for all the given technologies above and write the results in an excel spreadsheet.


If you do not know how create excel file using python, double click here for **hints**.

<!--

from openpyxl import Workbook        # import Workbook class from module openpyxl
wb=Workbook()                        # create a workbook object
ws=wb.active                         # use the active worksheet
ws.append(['Country','Continent'])   # add a row with two columns 'Country' and 'Continent'
ws.append(['Eygpt','Africa'])        # add a row with two columns 'Egypt' and 'Africa'
ws.append(['India','Asia'])          # add another row
ws.append(['France','Europe'])       # add another row
wb.save("countries.xlsx")            # save the workbook into a file called countries.xlsx


-->


Create a python list of all technologies for which you need to find the number of jobs postings.


In [26]:

technologies = [
    "Python",
    "Java",
    "C#",
    "JavaScript",
    "Oracle",
    "C++",
    "C",
    "Scala"
]

Import libraries required to create excel spreadsheet


In [7]:
!pip install openpyxl

Collecting openpyxl
  Downloading openpyxl-3.1.5-py2.py3-none-any.whl.metadata (2.5 kB)
Collecting et-xmlfile (from openpyxl)
  Downloading et_xmlfile-1.1.0-py3-none-any.whl.metadata (1.8 kB)
Downloading openpyxl-3.1.5-py2.py3-none-any.whl (250 kB)
Downloading et_xmlfile-1.1.0-py3-none-any.whl (4.7 kB)
Installing collected packages: et-xmlfile, openpyxl
Successfully installed et-xmlfile-1.1.0 openpyxl-3.1.5


In [27]:

import requests
from openpyxl import Workbook

Create a workbook and select the active worksheet


In [28]:

# Create a new Excel workbook and select the active worksheet
workbook = Workbook()
sheet = workbook.active

Find the number of jobs postings for each of the technology in the above list.
Write the technology name and the number of jobs postings into the excel spreadsheet.


In [29]:

# Write the headers
sheet['A1'] = 'Technology'
sheet['B1'] = 'Number of Job Postings'

# Populate the sheet with technology and job counts
for index, tech in enumerate(technologies, start=2):
    job_count = get_number_of_jobs_T(tech)
    sheet[f'A{index}'] = tech
    sheet[f'B{index}'] = job_count[1]

Save into an excel spreadsheet named 'github-job-postings.xlsx'.


In [30]:

# Save the workbook to an Excel file
workbook.save('github-job-postings.xlsx')

print("Excel spreadsheet 'github-job-postings.xlsx' has been created successfully.")

Excel spreadsheet 'github-job-postings.xlsx' has been created successfully.


Collect the number of job postings for the following Language Programs using the API:

*   C
*   C#
*   C++
*   Java
*   JavaScript
*   Python
*   Scala
*   Oracle
*   SQL Server
*   MySQL Server
*   PostgreSQL
*   MongoDB


In [8]:

import requests
from openpyxl import Workbook

# Define the list of programming languages
languages = [
    "C",
    "C#",
    "C++",
    "Java",
    "JavaScript",
    "Python",
    "Scala",
    "Oracle",
    "SQL Server",
    "MySQL Server",
    "PostgreSQL",
    "MongoDB"
]

# Function to get the number of job postings for a given language
def get_number_of_jobs_T(language):
    api_url = "http://127.0.0.1:5000/data"
    params = {'Key Skills': language}
    response = requests.get(api_url, params=params)
    
    if response.status_code == 200:
        jobs_data = response.json()
        return len(jobs_data)  # Return the count of job postings
    else:
        print(f"Error: {response.status_code} - Cannot retrieve data for {language}.")
        return 0

# Create a new Excel workbook and select the active worksheet
workbook = Workbook()
sheet = workbook.active

# Write the headers
sheet['A1'] = 'Programming Language'
sheet['B1'] = 'Number of Job Postings'

# Populate the sheet with language and job counts
for index, lang in enumerate(languages, start=2):
    job_count = get_number_of_jobs_T(lang)
    sheet[f'A{index}'] = lang
    sheet[f'B{index}'] = job_count

# Save the workbook to an Excel file
workbook.save('language-job-postings-2.xlsx')

print("successfully.")

Excel spreadsheet 'language-job-postings.xlsx' has been created successfully.


Find the maximum number of job postings by Location

In [12]:

import requests
from openpyxl import Workbook

# Define the list of programming languages
L= ["Los Angeles", "New York", "San Francisco", "Washington DC", "Seattle"]

# Function to get the number of job postings for a given language
def get_number_of_jobs_L(L):
    api_url = "http://127.0.0.1:5000/data"
    params = {'Location': L}
    response = requests.get(api_url, params=params)
    
    if response.status_code == 200:
        jobs_data = response.json()
        return len(jobs_data)  # Return the count of job postings
    else:
        print(f"Error: {response.status_code} - Cannot retrieve data for {L}.")
        return 0

# Create a new Excel workbook and select the active worksheet
workbook = Workbook()
sheet = workbook.active

# Write the headers
sheet['A1'] = 'Location'
sheet['B1'] = 'Number of Job Postings'

# Populate the sheet with language and job counts
for index, locate in enumerate(L, start=2):
    job_count = get_number_of_jobs_L(locate)
    sheet[f'A{index}'] = locate
    sheet[f'B{index}'] = job_count

# Save the workbook to an Excel file
workbook.save('location-job-postings.xlsx')

print("successfully.")

successfully.


<!--## Change Log


<!--| Date (YYYY-MM-DD) | Version | Changed By        | Change Description                 |
| ----------------- | ------- | ----------------- | ---------------------------------- | 
| 2022-01-19        | 0.3     | Lakshmi Holla        | Added changes in the markdown      |
| 2021-06-25        | 0.2     | Malika            | Updated GitHub job json link       |
| 2020-10-17        | 0.1     | Ramesh Sannareddy | Created initial version of the lab |--!>
