In [7]:
import pandas as pd
import numpy as np
import pygsheets
from datetime import date
import re

In [8]:
def find_requirements(string):
    if string:
        start = re.search("([rR]equire)|([Qq]ualifications)",string)
        start = start.start() if start else 0
        end = re.search("[bB]enefits",string)
        end  = end.start() if end else len(string)
        requirements_ls = string[start:end].split('\n')
        for i in requirements_ls:
            if len(i)<=5:
                requirements_ls.remove(i)
        return '$'.join(requirements_ls[1:])
    return ''

In [9]:
def calculate_date(string):
    for i in range(len(string)):
        res = int(string[i].split(' ')[0])
        unit = string[i].split(' ')[1]
        if unit == 'minutes' or unit =='minute':
            string[i] = res
        elif unit == 'hours' or unit =='hour':
            string[i] = res*60
        elif unit == 'days'or unit =='day':
            string[i] = res*60*24
        elif unit == 'weeks' or unit =='week':
            string[i] = res*60*24*7
        elif unit == 'months' or unit =='month':
            string[i] = res*60*24*30
        else:
            string[i] = res
    return string

In [72]:
def parse_df(path='jobs.csv'):
    jobs = pd.read_csv(path)
    jobs.drop(index=jobs.loc[jobs['already_applied'].isna()==False].index,inplace = True)
    # jobs.drop(index=jobs.loc[jobs['easy_apply'].isna()==False].index,inplace = True)
    jobs.drop(columns=['Unnamed: 0','already_applied','applicant_count'],inplace = True)
    
    #split columns
    jobs['location'] = jobs['company'].apply(lambda x:x.split('·')[1])
    jobs['applicants'] = jobs['company'].apply(lambda x:int(x.split('·')[2][:-10].replace(',','')))
    jobs['company'] = jobs['company'].apply(lambda x:x.split('·')[0])
    pattern = r"[0-9]+"
    jobs['posted_date'] = jobs['location'].apply(lambda x: x[re.search(pattern,x).start():])
    jobs['location'] = jobs['location'].apply(lambda x: x[:re.search(pattern,x).start()])
    
    jobs['requirements']=jobs['job_description'].apply(find_requirements)
    jobs.drop(columns=['job_description'],inplace=True)
    jobs.sort_values(by = 'posted_date', key = calculate_date, inplace=True)
    jobs = jobs[['job_title','linkedin_url','company','company_linkedin_url','location','posted_date','easy_apply','applicants','requirements','benefits']]
    return jobs

In [73]:
def upload_df(df,path = 'C:/Users/Muggl/Desktop/linkedin-jobs-392523-00de46c464a4.json'):
# Update the google spreadsheet 
    gc=pygsheets.authorize(service_account_file=path)
    sh = gc.open('daily_linkedin_jobs')
    try:
        sh.add_worksheet(f"{str(date.today())}")
        current_sheet = sh.worksheets()[-1]
    except:
        current_sheet=sh.worksheet_by_title(f"{str(date.today())}")
        current_sheet.clear()
    
    current_sheet.set_dataframe(df,(1,1))
    res = []
    for i in range(2,len(df)+2):
        command = "=SUBSTITUTE(" + f"I{i}" + ',"$"'+",CHAR(10))"
        res.append([command])
    current_sheet.update_value("K1", "requirements_listed")
    current_sheet.update_values(f"K2:K{len(df)+2}",res,parse = True)

In [74]:
def main():
    job_df = parse_df()
    upload_df(job_df)

In [75]:
job_df = parse_df()
upload_df(job_df)

In [76]:
job_df

Unnamed: 0,job_title,linkedin_url,company,company_linkedin_url,location,posted_date,easy_apply,applicants,requirements,benefits
10,Python Developer,https://www.linkedin.com/jobs/view/3655678689/...,Flexon Technologies Inc.,https://www.linkedin.com/company/flexon-techno...,"Cupertino, CA (On-site)",1 hour ago,Easy Apply,5,5+ years relevant job experience$Admin level e...,
34,Python Developer,https://www.linkedin.com/jobs/view/3660895686/...,Jobot,https://www.linkedin.com/company/jobot/,"Rockville, MD (On-site)",3 hours ago,Easy Apply,19,Create user-friendly and intuitive user interf...,Pay found in job post\nRetrieved from the desc...
20,Python Developer,https://www.linkedin.com/jobs/view/3660899423/...,Avenues International Inc.,https://www.linkedin.com/company/avenuesinc/,"Manhattan, NY (On-site)",3 hours ago,Easy Apply,32,Position Name: Senior Python Developer$Duratio...,"Employer-provided\nPay range in Manhattan, NY\..."
15,Python Developer,https://www.linkedin.com/jobs/view/3661135382/...,Open Systems Technologies,https://www.linkedin.com/company/open-systems-...,"New York, NY (Hybrid)",4 hours ago,Easy Apply,46,"A Bachelor's Degree in Computer Science, Engin...",Pay found in job post\nRetrieved from the desc...
64,"Software Engineer, Python - 1001",https://www.linkedin.com/jobs/view/3660869057/...,Reinventing Geospatial (RGi),https://www.linkedin.com/company/reinventing-g...,"St Louis, MO (Hybrid)",6 hours ago,Apply,0,Bachelor’s degree or equivalent experience$3+ ...,Benefits found in job post\nDental insurance\n...
...,...,...,...,...,...,...,...,...,...,...
9,AWS / Python Software Engineer,https://www.linkedin.com/jobs/view/3632641637/...,Videon,https://www.linkedin.com/company/videon-central/,United States (Remote),1 month ago,Apply,852,Proficiency with serverless Cloud architecture...,
16,Software Engineer,https://www.linkedin.com/jobs/view/3620277367/...,Obscurity Labs,https://www.linkedin.com/company/obscuritylabs/,"Chantilly, VA (On-site)",1 month ago,,384,Title: Software Engineer$Level: Intermediate S...,"Employer-provided\nPay range in Chantilly, VA\..."
23,Software Engineer (Python),https://www.linkedin.com/jobs/view/3594316824/...,"Harmonia Holdings Group, LLC",https://www.linkedin.com/company/harmonia-hold...,"McLean, VA (Hybrid) Reposted",1 month ago,,991,Essential Job Functions:$Knowledge and working...,Featured benefits\nDental insurance\nVision in...
49,Python Developer,https://www.linkedin.com/jobs/view/3582074435/...,Synechron,https://www.linkedin.com/company/synechron/,"Charlotte, NC (Hybrid) Reposted",2 months ago,Easy Apply,386,An aspiring quant developer must learn a codin...,
