# Founder Rank
This notebook implements all ranking workflows. 

In [1]:

import numpy as np
import json
import sys
import pandas as pd
import os
from dotenv import load_dotenv

sys.path.append("..")

from src.config.config import cfg
from src.clients.perplexity_client import PerplexityClient
from src.clients.proxycurl_client import ProxycurlClient
from src.data.profile_transforms import ProfileTransforms
from src.core.ranking import search_founders, rank_profiles, load_model
from src.utils.profile_utils import get_queried_urls


load_dotenv()
np.set_printoptions(precision=2, suppress=True, linewidth=120)

### Search 

We can provide a list of URLs (SEARCH =False) or let the script search for founders (SEARCH = True). 

In [2]:
px = ProxycurlClient()
pc = PerplexityClient()

N = 7 # 

SEARCH = False

linkedin_urls = [
    "https://linkedin.com/in/george-goodfellow/",
    "https://linkedin.com/in/adithyagurunathan/",
    "https://linkedin.com/in/sarangpujari/",
    "https://linkedin.com/in/katelam8/",
    "https://linkedin.com/in/christopher-hur/",
    "https://linkedin.com/in/aliciajsteele/",
    "https://linkedin.com/in/charlesfatunbi/",
    "https://linkedin.com/in/tejal-dahake/",
    "https://linkedin.com/in/rohan-devraj/",
    "https://linkedin.com/in/skareer/",
    "https://linkedin.com/in/imgeorgiev/",
    "https://linkedin.com/in/viresh-pati/",
]

LIST_NAME = 'angel-network'
MODEL_PATH = '../models/founder_rank_with_ranking_loss.pkl'

In [3]:
if SEARCH:
    data = search_founders(px=px, limit=N)
else:
    data = []
    with open(f"../data/proxycurl/{LIST_NAME}.json", "r") as json_file:
        data = json.load(json_file)
    processed = get_queried_urls(data)
    
    print(f'Found {len(processed)} profiles in specified dir')
    for url in linkedin_urls:
        if url in processed:
            print(f'already processed {url} ... skipping')
            continue
        print(f"Fetching profile: {url}")
        profile = px.fetch_linkedin_profile(url, use_cache="if-recent")
        if profile:
            data.append({"profile": profile})




Found 12 profiles in specified dir
already processed https://linkedin.com/in/george-goodfellow/ ... skipping
already processed https://linkedin.com/in/adithyagurunathan/ ... skipping
already processed https://linkedin.com/in/sarangpujari/ ... skipping
already processed https://linkedin.com/in/katelam8/ ... skipping
already processed https://linkedin.com/in/christopher-hur/ ... skipping
already processed https://linkedin.com/in/aliciajsteele/ ... skipping
already processed https://linkedin.com/in/charlesfatunbi/ ... skipping
already processed https://linkedin.com/in/tejal-dahake/ ... skipping
already processed https://linkedin.com/in/rohan-devraj/ ... skipping
already processed https://linkedin.com/in/skareer/ ... skipping
already processed https://linkedin.com/in/imgeorgiev/ ... skipping
already processed https://linkedin.com/in/viresh-pati/ ... skipping


In [4]:
# with open(f"../data/proxycurl/{LIST_NAME}.json", "w") as f:
#     json.dump(data, f, indent=2)

In [5]:
data

[{'profile': {'public_identifier': 'george-goodfellow',
   'profile_pic_url': 'https://media.licdn.com/dms/image/v2/D5603AQH8qqwb2vupLQ/profile-displayphoto-shrink_800_800/profile-displayphoto-shrink_800_800/0/1684288401420?e=1747267200&v=beta&t=8Y-Y0Qq2osYqorc4FBE6JFOgDTAtRNgqiB-tyTXmcZ4',
   'background_cover_image_url': 'https://media.licdn.com/dms/image/v2/D5616AQGCDyYOxngABQ/profile-displaybackgroundimage-shrink_350_1400/profile-displaybackgroundimage-shrink_350_1400/0/1721653049636?e=1747267200&v=beta&t=EBURtMk69Laf1HnnqWrntXbM8o4DUxu-2cPkxQIwg64',
   'first_name': 'George',
   'last_name': 'Goodfellow',
   'full_name': 'George Goodfellow',
   'follower_count': 5064,
   'occupation': 'Co-Founder at GT Angel Network',
   'headline': 'Building custom API integrations instantly @ Versori | Co-Founder, The Georgia Tech Angel Network',
   'summary': None,
   'country': 'US',
   'country_full_name': 'United States',
   'city': 'Atlanta',
   'state': 'Georgia',
   'experiences': [{'star

### Transforming Profiles

In [6]:
# %%timeit
T = ProfileTransforms(data)
DF_DIR = f'../data/sample_encodings/'
 
# df = T.process_profiles(profiles=data, perplexity_client=pc, output_dir=DF_DIR,batch_code=LIST_NAME)


df = pd.read_csv(f'{DF_DIR}{LIST_NAME}-profiles.csv', index_col=False)
df['feature_vector'] = df['feature_vector'].apply(lambda x: np.fromstring(x.strip("[]"), sep=' '))
T.df = df


### Ranking

In [7]:

ranked_results = rank_profiles(df, T.get_feature_matrix(), model_dict=load_model(model_path=MODEL_PATH))
display(ranked_results[['Name','Linkedin','UNDERGRAD','GRADUATE','COMPANY','SENIORITY','EXPERTISE','EXIT','FOUNDER','STARTUP','score']].sort_values(by="score", ascending=False))

# ranked_results.to_csv(f'../out/founders-{len([f for f in os.listdir("../out") if f.startswith("founders-")]) + 1}.csv', index=False)

Unnamed: 0,Name,Linkedin,UNDERGRAD,GRADUATE,COMPANY,SENIORITY,EXPERTISE,EXIT,FOUNDER,STARTUP,score
0,George Goodfellow,https://www.linkedin.com/in/george-goodfellow,2,0,1,2,3,0,2,3,0.547202
1,Sarang Pujari,https://www.linkedin.com/in/sarangpujari,2,1,1,2,3,0,1,2,0.469755
2,Ignat Georgiev🤖,https://www.linkedin.com/in/imgeorgiev,1,1,2,2,3,0,1,3,0.446629
3,Adithya G.,https://www.linkedin.com/in/adithyagurunathan,2,0,2,1,3,0,1,3,0.441652
4,Tejal Dahake,https://www.linkedin.com/in/tejal-dahake,2,0,1,2,3,0,1,2,0.412835
5,Simar Kareer,https://www.linkedin.com/in/skareer,2,1,1,1,3,0,1,2,0.354172
6,Christopher Hur,https://www.linkedin.com/in/christopher-hur,2,0,1,1,1,0,1,2,0.325118
7,Katelyn L.,https://www.linkedin.com/in/katelam8,2,0,1,2,1,0,1,2,0.306268
8,Viresh Pati,https://www.linkedin.com/in/viresh-pati,2,1,1,1,3,0,1,1,0.305756
9,"Charles Fatunbi, MBA",https://www.linkedin.com/in/charlesfatunbi,1,1,1,2,3,0,1,2,0.292106
