# Analysis of Channels

In [1]:
# import data and libraries
import pandas as pd

# read in data
df = pd.read_csv('../2023-04-14-job-search/Clean_Data/combined_data_final.csv')

df.head()

Unnamed: 0,title,company_name,location,via,description,detected_extensions.schedule_type,detected_extensions.work_from_home,detected_extensions.posted_at,detected_extensions.salary,search_parameters.q,Qualifications,Responsibilities,Benefits
0,Ethereum Blockchain Developer (Remote),Ex Populus,Anywhere,via Built In,Company Overview:\nEx Populus is a cutting-edg...,Full-time,True,,,block chain,"2-3 years of Software Development experience,1...","Design, maintain and deploy smart contracts fo...",
1,Blockchain Engineer,21.co,"New York, NY",via Greenhouse,We are seeking a highly motivated and skilled ...,Full-time,,,,block chain,Bachelor's or Master's degree in Computer Scie...,"As a Blockchain Engineer, you will be responsi...",(NYC only) Pursuant to Section 8-102 of title ...
2,Blockchain Course Instructor,Blockchain Institute of Technology,Anywhere,via LinkedIn,"Are you a blockchain, cryptocurrency, NFT, Met...",Contractor,True,24 hours ago,,block chain,"3+ years of experience in blockchain, cryptocu...",Our expert technical team will provide the sup...,
3,Python based - Blockchain developer to join ex...,Upwork,Anywhere,via Upwork,Need someone to join our existing team to spee...,Contractor,True,2 days ago,10–30 an hour,block chain,"Candidates must be willing to sign, non-disclo...",Will discuss details with the selected candidates,
4,Blockchain DevOps Engineer (Remote),Telnyx,United States,via Startup Jobs,"About Telnyx\n\nAt Telnyx, we’re architecting ...",Full-time,,4 days ago,,block chain,You are a highly motivated and experienced Blo...,To build a best-in-class Filecoin (FIL) Mining...,


## Histogram of Channels


In [2]:
import plotly.express as px
import plotly.graph_objects as go
import plotly.io as pio
pio.renderers.default = "plotly_mimetype+notebook_connected"

In [9]:
df['via'].value_counts()

via LinkedIn                                   51
via ZipRecruiter                               45
via Upwork                                     38
via AngelList                                  24
via Lever                                      15
                                               ..
via Careers At Chime                            1
via Built In Seattle                            1
via Eightfold.ai                                1
via Fidelity Careers - Fidelity Investments     1
via WDHN Jobs                                   1
Name: via, Length: 219, dtype: int64

In [26]:
# create a dataframe with the number of jobs per channel
df_channel = df.groupby('via').count().reset_index()

# select top 10 channels
df_channel = df_channel.sort_values(by='title', ascending=False).head(10)

# create a bar chart
fig = px.bar(df_channel, x='via', y = 'title', color='via', 
             title='Number of Jobs per Channel', text='title',
             labels={'via':'Channel', 'title':'Number of Jobs'},
             )
fig.update_traces(texttemplate='%{text:.2s}', textposition='outside')
fig.update_layout(xaxis_title='Channel', yaxis_title='Number of Jobs',
                  yaxis_range=[0, 60]
                  )

fig.show()

In [29]:
# save the figure to html
fig.write_html("../Mini-Data-Viz-Project/images/Channel_bar.html")

## Pie Chart of detected_extentions.post_at by selection LinkedIn and ZipRecruiter 

In [43]:
# select via = 'LinkedIn' and ZipRecruiter
df_post_at = df[df['via'].isin(['via LinkedIn', 'via ZipRecruiter', 'Via Upwork'])]

df_post_at['detected_extensions.posted_at'].value_counts()

3 days ago      14
4 days ago      13
2 days ago       7
7 days ago       6
5 days ago       5
13 days ago      3
12 days ago      3
10 days ago      2
24 days ago      2
11 hours ago     2
24 hours ago     1
26 days ago      1
9 hours ago      1
23 hours ago     1
3 hours ago      1
1 day ago        1
15 hours ago     1
21 days ago      1
8 days ago       1
17 days ago      1
6 days ago       1
18 days ago      1
11 days ago      1
20 days ago      1
Name: detected_extensions.posted_at, dtype: int64

In [44]:
# select the top 6 posted_at
df_post_at = df_post_at.groupby('detected_extensions.posted_at').count().reset_index()

df_post_at = df_post_at.sort_values(by='title', ascending=False).head(6)

# create a pie chart
fig = px.pie(df_post_at, values='title', names='detected_extensions.posted_at',
                title='Number of Jobs Posted at Different Times',
                labels={'detected_extensions.posted_at':'Posted at', 'title':'Number of Jobs'},
                )
fig.update_traces(textposition='inside', textinfo='percent+label')
fig.update_layout(showlegend=False)

fig.show()

In [45]:
# save the figure to html
# fig.write_html("../Mini-Data-Viz-Project/images/Posted_at_pie.html")