# LinkedIn Connection Visualization

## Importing libraries and our data

In [1]:
# import libraries
import pandas as pd
import plotly.express as px
import numpy as np

In [2]:
# mounted google drive
import google.colab as gc
gc.drive.mount('/content/drive')

# change folder
%cd '/content/drive/My Drive/datasets'

# read the file
df = pd.read_csv('myconnections.csv')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).
/content/drive/My Drive/datasets


## Let's look into our data

So, our data consist of 450 rows and 6 columns. And, there are some missing value that we will handle later.

In [3]:
# a little peek to our data
df.head()

Unnamed: 0,First Name,Last Name,Email Address,Company,Position,Connected On
0,Irfan,Delmar,,LAZNAS IZI,Information Technology Staff,20 May 2021
1,Muhammad Irfan,Fadhlurrahman,,PT. Rekayasa Industri,Mechanical Engineer,20 May 2021
2,Niluh Cantia,Paradita Dealuma,,Himpunan Mahasiswa Fisika ITB (HIMAFI ITB),Career Development Staff,20 May 2021
3,Dilan,Kusuma,,PT. Bank Syaraiah Bukopin,Compliance Officer,20 May 2021
4,Melisa,Damayanti,,Institut Teknologi Bandung,Assistant Lab Basic Physics ITB,19 May 2021


In [4]:
# general information of our data
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 450 entries, 0 to 449
Data columns (total 6 columns):
 #   Column         Non-Null Count  Dtype 
---  ------         --------------  ----- 
 0   First Name     450 non-null    object
 1   Last Name      428 non-null    object
 2   Email Address  8 non-null      object
 3   Company        393 non-null    object
 4   Position       393 non-null    object
 5   Connected On   428 non-null    object
dtypes: object(6)
memory usage: 21.2+ KB


## Handling our data

### Drop the missing value and the email column

In [5]:
# drop the email column and the missing value
df = df.drop(['Email Address'], axis=1)
df = df.dropna(subset=['Company', 'Position'])

### About synonyms...

Let's see only for example, word "Laboratory Assistant", there are many synonyms of it. We have to dealing with this, but maybe only for several Positions/Companies, because I'm doing this manually. He he ...

In [6]:
# check for Position that contain word "Laboratory Assistant"
df[df['Position'].str.contains("Laboratory Assistant")].head()

Unnamed: 0,First Name,Last Name,Company,Position,Connected On
86,M. Naufal Najib,Sanjaya,Institut Teknologi Bandung,Laboratory Assistant of Measurement and Analys...,31 Jan 2021
114,Hasni,Hasni,Bandung Institute of Technology,Basic Biochemistry Laboratory Assistant,21 Nov 2020
116,Karina Ayu,Tri Fadhilah,Universitas Padjadjaran,Laboratory Assistant of Cake & Bakery Processing,16 Nov 2020
120,Ahmad Fauzan,Bagaskoro,Institut Teknologi Bandung (ITB),Laboratory Assistant,09 Nov 2020
141,Achmad Zacky,Fairuza,Institut Teknologi Bandung (ITB),Laboratory Assistant,09 Oct 2020


In [7]:
# change Position/Company based on its synonims
df.loc[df['Position'].str.contains("Research Assistant"), 'Position'] = 'Research Assistant'
df.loc[df['Position'].str.contains("Lab"), 'Position'] = 'Laboratory Assistant'
df.loc[df['Position'].str.contains("|".join(["Grader","Lecturer Assistant","Teaching Assistant"])), 'Position'] = "Teaching Assistant"
df.loc[df['Position'].str.contains("|".join(["Data Science","Scientist"])), 'Position'] = "Data Scientist"
df.loc[df['Position'].str.contains("|".join(["Software Engineer"])), 'Position'] = "Software Engineer"
df.loc[df['Position'].str.contains("|".join(["Business Development"])), 'Position'] = "Business Development"
df.loc[df['Position'].str.contains("|".join(["Tutor","Tentor"])), 'Position'] = "Tutor"
df.loc[df['Position'].str.contains("|".join(["Data Analyst"])), 'Position'] = "Data Analyst"
df.loc[df['Position'].str.contains("|".join(["Software Engineer"])), 'Position'] = "Software Engineer"
df.loc[df['Position'].str.contains("|".join(["Guru","Teacher"])), 'Position'] = "Teacher"

df.loc[df['Company'].str.contains("|".join(["Teknologi Bandung","Bandung Institute of Technology"])), 'Company'] = "Institut Teknologi Bandung"

## Let's visualize my connection

In [8]:
# create root node for treemap
df['Ikhwanuls Network'] = 'Ikhwanuls Network'

### Company centric

In [9]:
# treemap for company - position
px.treemap(df, path=['Ikhwanuls Network', 'Company', 'Position'], width=1200, height=1200)

### Position centric

In [10]:
# treemap for position - company
px.treemap(df, path=['Ikhwanuls Network', 'Position', 'Company'], width=1200, height=1200)