In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import os
import json

Load data

In [2]:
folders = [
    "2023-04-14-job-search-location-DC",
    "2023-04-14-job-search-location-USA",
]


def extract_jobs_results(file_path):
    with open(file_path) as f:
        data = json.load(f)
    return data["jobs_results"]


all_jobs_results = []
for folder in folders:
    for file in os.listdir(folder):
        if file.endswith(".json"):
            file_path = os.path.join(folder, file)
            try:
                jobs_results = extract_jobs_results(file_path)
                all_jobs_results.extend(jobs_results)
            except:
                print("no job results found")

df = pd.DataFrame(all_jobs_results)

no job results found
no job results found


In [3]:
df.head()

Unnamed: 0,title,company_name,location,via,description,job_highlights,related_links,extensions,detected_extensions,job_id
0,Ethereum Blockchain Developer (Remote),Ex Populus,Anywhere,via Built In,Company Overview:\nEx Populus is a cutting-edg...,"[{'title': 'Qualifications', 'items': ['2-3 ye...",[{'link': 'https://www.google.com/search?hl=en...,"[Work from home, Full-time, No degree mentioned]","{'schedule_type': 'Full-time', 'work_from_home...",eyJqb2JfdGl0bGUiOiJFdGhlcmV1bSBCbG9ja2NoYWluIE...
1,Blockchain Engineer,21.co,"New York, NY",via Greenhouse,We are seeking a highly motivated and skilled ...,"[{'title': 'Qualifications', 'items': ['Bachel...",[{'link': 'https://www.google.com/search?hl=en...,[Full-time],{'schedule_type': 'Full-time'},eyJqb2JfdGl0bGUiOiJCbG9ja2NoYWluIEVuZ2luZWVyIi...
2,Blockchain Course Instructor,Blockchain Institute of Technology,Anywhere,via LinkedIn,"Are you a blockchain, cryptocurrency, NFT, Met...","[{'title': 'Qualifications', 'items': ['3+ yea...",[{'link': 'https://www.google.com/search?hl=en...,"[24 hours ago, Work from home, Contractor, No ...","{'posted_at': '24 hours ago', 'schedule_type':...",eyJqb2JfdGl0bGUiOiJCbG9ja2NoYWluIENvdXJzZSBJbn...
3,Python based - Blockchain developer to join ex...,Upwork,Anywhere,via Upwork,Need someone to join our existing team to spee...,"[{'title': 'Qualifications', 'items': ['Candid...","[{'link': 'http://www.elance.com/', 'text': 'e...","[2 days ago, 10–30 an hour, Work from home, Co...","{'posted_at': '2 days ago', 'schedule_type': '...",eyJqb2JfdGl0bGUiOiJQeXRob24gYmFzZWQgLSBCbG9ja2...
4,Blockchain DevOps Engineer (Remote),Telnyx,United States,via Startup Jobs,"About Telnyx\n\nAt Telnyx, we’re architecting ...","[{'title': 'Qualifications', 'items': ['You ar...","[{'link': 'http://telnyx.com/', 'text': 'telny...","[4 days ago, Full-time, No degree mentioned]","{'posted_at': '4 days ago', 'schedule_type': '...",eyJqb2JfdGl0bGUiOiJCbG9ja2NoYWluIERldk9wcyBFbm...


From the 85 searches, we now have 823 job results in total. There are `title`, `location`, `description`, `job_highlights`, `extensions` and `detected_extensions` in the dataframe. And the `job_id` and `related_link` are not the ones we care about, so I will first remove them from the dataframe.

In [12]:
df.shape

(823, 10)

In [4]:
df.columns

Index(['title', 'company_name', 'location', 'via', 'description',
       'job_highlights', 'related_links', 'extensions', 'detected_extensions',
       'job_id'],
      dtype='object')

In [6]:
df_new = df[['title', 'company_name', 'location', 'via', 'description',
       'job_highlights', 'extensions', 'detected_extensions']]

First I want to look at the `via` column and see which platforms are helpful when we want to look for a job.

In [11]:
df_new['via'].apply(lambda x: x.startswith("via ")).sum()

823

In [15]:
df_new['platform'] = df_new['via'].apply(lambda x:x[4:])

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_new['platform'] = df_new['via'].apply(lambda x:x[4:])


Since all the value from `via` column starts with "via ". I remove the first four digits from each value and create a new column `platform` in `df_new`.

In [27]:
import seaborn as sns
import plotly.express as px

In [33]:
platform_counts = df_new["platform"].value_counts().reset_index()
platform_counts.columns = ["platform", "count"]
platform_counts = platform_counts[platform_counts['count']>10]

fig1 = px.bar(
    platform_counts,
    x="count",
    y="platform",
    orientation="h",
    hover_name="platform",
    hover_data={"platform": False, "count": True},
    # text="count",
    color="count",
    color_continuous_scale=["#7ABAFF", "#0077B5"],
    labels={"count": "Count", "platform": "Platform"},
    title="Job Platforms Appearing More than 10 Times",
)

fig1.update_layout(
    title=dict(x=0.5, xanchor="center"),
    xaxis=dict(title="Count"),
    yaxis=dict(title="Platform", categoryorder="total ascending"),
    coloraxis=dict(colorbar=dict(title="Count")),
)

fig1.show()


Now I only have the bar chart. With no surprise, LinkedIn appears most. But ZipRecruiter and Upwork is what I haven't heard before. Maybe in specific greater DC area the job platforms are slightly different from what are popular in the whole country. Or the job types are different. We'll find out.

In [34]:
df_new.head()

Unnamed: 0,title,company_name,location,via,description,job_highlights,extensions,detected_extensions,platform
0,Ethereum Blockchain Developer (Remote),Ex Populus,Anywhere,via Built In,Company Overview:\nEx Populus is a cutting-edg...,"[{'title': 'Qualifications', 'items': ['2-3 ye...","[Work from home, Full-time, No degree mentioned]","{'schedule_type': 'Full-time', 'work_from_home...",Built In
1,Blockchain Engineer,21.co,"New York, NY",via Greenhouse,We are seeking a highly motivated and skilled ...,"[{'title': 'Qualifications', 'items': ['Bachel...",[Full-time],{'schedule_type': 'Full-time'},Greenhouse
2,Blockchain Course Instructor,Blockchain Institute of Technology,Anywhere,via LinkedIn,"Are you a blockchain, cryptocurrency, NFT, Met...","[{'title': 'Qualifications', 'items': ['3+ yea...","[24 hours ago, Work from home, Contractor, No ...","{'posted_at': '24 hours ago', 'schedule_type':...",LinkedIn
3,Python based - Blockchain developer to join ex...,Upwork,Anywhere,via Upwork,Need someone to join our existing team to spee...,"[{'title': 'Qualifications', 'items': ['Candid...","[2 days ago, 10–30 an hour, Work from home, Co...","{'posted_at': '2 days ago', 'schedule_type': '...",Upwork
4,Blockchain DevOps Engineer (Remote),Telnyx,United States,via Startup Jobs,"About Telnyx\n\nAt Telnyx, we’re architecting ...","[{'title': 'Qualifications', 'items': ['You ar...","[4 days ago, Full-time, No degree mentioned]","{'posted_at': '4 days ago', 'schedule_type': '...",Startup Jobs
