# Process LinkedIn Data

In [1]:
import pandas as pd

In [2]:
def import_file(file_name, url):
    try:
        df = pd.read_csv(url + file_name)
        return df
    except FileNotFoundError:
        print(f"Missing {file_name} file.")

In [3]:
def linkedin_data(folder_name):
    # Loading files
    url = f'../data/linkedin/{folder_name}/'
    
    skills_data = import_file('Skills.csv', url)
    positions_data = import_file('Positions.csv', url)
    profile_data = import_file('Profile.csv', url)
    education_data = import_file('Education.csv', url)

    # Extracting information
    try:
        full_name = profile_data['First Name'] + " " + profile_data['Last Name']
        summary = profile_data['Summary']
    except:
        print("Profile columns could not be created.")

    try:
        position_list = positions_data['Title'].values.tolist()
        position_string = ', '.join([str(position) for position in position_list])
    except:
        print("Positions columns could not be created.")
    
    try:
        skills_list = skills_data.values.tolist()
        skills_string = ', '.join([str(skill) for skill in skills_list])
        skills_string = skills_string.replace("['", "").replace("']", "")
    except:
        print("Skills columns could not be created.")

    try:
        education_list = education_data['Degree Name'].values.tolist()
        education_string = ', '.join([str(education) for education in education_list])
    except:
        print("Education columns could not be created.")

    try:
        jobtype_list = jobpref_data['Preferred Job Types'].values.tolist()
        jobtype_string = ', '.join([str(jtype) for jtype in jobtype_list])
        jobtype_string = jobtype_string.replace(" | ", ", ")
        
        prefjob_list = jobpref_data['Job Titles'].values.tolist()
        prefjob_string = ', '.join([str(pjob) for pjob in prefjob_list])
        prefjob_string = prefjob_string.replace(" | ", ", ")
    except:
        print("Job preference columns could not be created.")
    
    # Building dataframe
    data_df = {}
    
    try: data_df['Name'] = full_name
    except: print('Missing name.')
    
    try: data_df['Titles'] = position_string
    except: print('Missing titles.')
    
    try: data_df['Skills'] = skills_string
    except: print('Missing skills.')
    
    try: data_df['Summary'] = summary
    except: print('Missing summary.')

    try: data_df['Education'] = education_string
    except: print('Missing education.')

    try: data_df['Job Type Preference'] = jobtype_string
    except: print('Missing job type preference.')

    try: data_df['Preferred Job Title'] = prefjob_string
    except: print('Missing preferred job title.')

    # Saving dataframe
    try:
        test_linked = pd.DataFrame(data=data_df)
        test_linked.to_csv(f'../data/linkedin/test-output/{folder_name}.csv', index=False)
    except:
        print('Failed to save dataframe.')
    
    return test_linked

# Test it out here

In [6]:
df_zach = linkedin_data('Zach_LinkedInData_12-16-2020')

Job preference columns could not be created.
Missing job type preference.
Missing preferred job title.


In [7]:
df_zach['Titles'][0]

'Data Science Fellow, Python Developer, Health and Fitness Coach, IT Specialist, Data Analyst, Computational Modeling, Data Analyst'

In [8]:
df_nolan = linkedin_data('Nolan_LinkedInData_12-16-2020')

Job preference columns could not be created.
Missing job type preference.
Missing preferred job title.


In [9]:
df_nolan['Titles'][0]

'Data Science Fellow, Painter'