In [1]:
import getpass
from serpapi import GoogleSearch

In [2]:
api_key = getpass.getpass()

In [26]:
def search_google_scholar_profiles(full_name:str):
    params = {
    "api_key": api_key,
    "engine": "google_scholar_profiles",
    "hl": "en",
    "mauthors": full_name
    }

    search = GoogleSearch(params)

    print("Searching for profile of ", full_name)
    results = search.get_dict()

    try:
        for profile in results["profiles"]:
            if "Ben Gurion University" in profile["affiliations"] or "Ben-Gurion University" in profile["affiliations"]:
                print(f"\tFound a profile of {profile['name']} under Ben Gurion Univerisity")
                return profile
    except KeyError as e:
        print(f"\tDid not find any profile of {full_name}. {e}")
    
    print(f"\tDid not find the profile of {full_name} under Ben Gurion university")

    return None

def get_author_uuid(profile:dict):
    return profile["author_id"]
        

def search_google_scholar_author(author_id):
    print(f"\tSearch for author id {author_id}")
    params = {
    "api_key": api_key,
    "engine": "google_scholar_author",
    "hl": "en",
    "author_id": author_id,
    "num": "100" # The maximum number
    }

    search = GoogleSearch(params)
    results = search.get_dict()

    return results

In [18]:
import json
import os 
supervisors_names = []

with open("../../data/supervisors.jsonl","r") as f:
    for line in f:
        supervisors_names.append(json.loads(line))

supervisors_names[:10]

[{'name': 'Jihad El sana',
  'personal_website_url': ['https://www.cs.bgu.ac.il/~el-sana']},
 {'name': 'Klara Kedem',
  'personal_website_url': ['http://www.cs.bgu.ac.il/~klara']},
 {'name': 'Matthew Katz',
  'personal_website_url': ['https://www.cs.bgu.ac.il/~matya']},
 {'name': 'Meirav Zehavi',
  'personal_website_url': ['https://sites.google.com/site/zehavimeirav']},
 {'name': 'Natan Rubin',
  'personal_website_url': ['https://sites.google.com/site/natanrubinshomepage/']},
 {'name': 'Paz Carmi',
  'personal_website_url': ['https://www.cs.bgu.ac.il/~carmip']},
 {'name': 'Ohad Ben-shahar',
  'personal_website_url': ['https://www.cs.bgu.ac.il/~ben-shahar']},
 {'name': 'Oren Freifeld',
  'personal_website_url': ['https://www.cs.bgu.ac.il/~orenfr']},
 {'name': 'Andrei Sharf',
  'personal_website_url': ['https://www.cs.bgu.ac.il/~asharf']},
 {'name': 'Danny Barash',
  'personal_website_url': ['https://www.cs.bgu.ac.il/~dbarash']}]

In [23]:
save_path = "../../data/google_scholar/"

def search_supervisor(full_name,line_num):
    supervisor_path = os.path.join(save_path,f"{line_num}_{full_name}")

    if not os.path.exists(supervisor_path):
        os.makedirs(supervisor_path)
    
    profile = search_google_scholar_profiles(full_name)

    if profile is None:
        print(f"Did not find profile for {full_name}... break")
        return
    
    with open(os.path.join(supervisor_path,"profile.json"),"w") as f:
        json.dump(profile, f)
    
    author_id = get_author_uuid(profile)

    author_details = search_google_scholar_author(author_id)

    with open(os.path.join(supervisor_path,"author_details.json"),"w") as f:
        json.dump(author_details,f)

    if len(author_details["articles"]) < 100:
        print(f"\t Retreived {len(author_details["articles"])} articles")
    else:
        print("\t SerpAPI allows only 100 max articales at a time, YOU MIGHT NEED TO SCRAPE MORE")
        return 0
    
    return 1

In [None]:
# search_supervisor(supervisors_names[0]["name"],0)

Searching for profile of  Jihad El sana
	Found a profile of Jihad El-Sana under Ben Gurion Univerisity


True

In [None]:
more_scraping = []
i_offset = 1
i_end = 5

for i,supervisor_dict in enumerate(supervisors_names[i_offset:i_end]):
    global_index = i_offset+i
    print(f"global_index is {global_index}")
    
    status = search_supervisor(supervisor_dict["name"],global_index) # i+1 because of supervisors_names[i_offset:] (to remain consistent)

    if status == 0:
        more_scraping.append(supervisor_dict)

global_index is 1
Searching for profile of  Klara Kedem
	Found a profile of Klara Kedem under Ben Gurion Univerisity
	 SerpAPI allows only 100 max articales at a time, YOU MIGHT NEED TO SCRAPE MORE
global_index is 2
Searching for profile of  Matthew Katz
	Found a profile of Matthew J. Katz under Ben Gurion Univerisity
	 SerpAPI allows only 100 max articales at a time, YOU MIGHT NEED TO SCRAPE MORE
global_index is 3
Searching for profile of  Meirav Zehavi
	Found a profile of Meirav Zehavi under Ben Gurion Univerisity
	 SerpAPI allows only 100 max articales at a time, YOU MIGHT NEED TO SCRAPE MORE
global_index is 4
Searching for profile of  Natan Rubin


KeyError: 'profiles'

In [27]:
more_scraping = []
i_offset = 5
i_end = 10

for i,supervisor_dict in enumerate(supervisors_names[i_offset:i_end]):
    global_index = i_offset+i
    print(f"global_index is {global_index}")
    
    try:
        status = search_supervisor(supervisor_dict["name"],global_index) # i+1 because of supervisors_names[i_offset:] (to remain consistent)

        if status == 0:
            more_scraping.append(supervisor_dict)
    except Exception as e:
        print(f"An error occured while searching for {supervisor_dict["name"]}. {e}")

global_index is 5
Searching for profile of  Paz Carmi
	Found a profile of Paz Carmi under Ben Gurion Univerisity
	Search for author id 6YQQL84AAAAJ
	 SerpAPI allows only 100 max articales at a time, YOU MIGHT NEED TO SCRAPE MORE
global_index is 6
Searching for profile of  Ohad Ben-shahar
	Found a profile of Ohad Ben-Shahar under Ben Gurion Univerisity
	Search for author id t77PmuQAAAAJ
	 SerpAPI allows only 100 max articales at a time, YOU MIGHT NEED TO SCRAPE MORE
global_index is 7
Searching for profile of  Oren Freifeld
	Found a profile of Oren Freifeld under Ben Gurion Univerisity
	Search for author id fxzlm6IAAAAJ
	 Retreived 63 articles
global_index is 8
Searching for profile of  Andrei Sharf
	Found a profile of Andrei Sharf under Ben Gurion Univerisity
	Search for author id QMEZPXEAAAAJ
	 Retreived 83 articles
global_index is 9
Searching for profile of  Danny Barash
	Found a profile of Danny Barash under Ben Gurion Univerisity
	Search for author id PlEWwDQAAAAJ
	 SerpAPI allows o

In [28]:
more_scraping = []
i_offset = 10
i_end = 30

for i,supervisor_dict in enumerate(supervisors_names[i_offset:i_end]):
    global_index = i_offset+i
    print(f"global_index is {global_index}")
    
    try:
        status = search_supervisor(supervisor_dict["name"],global_index) # i+1 because of supervisors_names[i_offset:] (to remain consistent)

        if status == 0:
            more_scraping.append(supervisor_dict)
    except Exception as e:
        print(f"An error occured while searching for {supervisor_dict["name"]}. {e}")

global_index is 10
Searching for profile of  Eran Treister
	Found a profile of Eran Treister under Ben Gurion Univerisity
	Search for author id 5nNoFlEAAAAJ
	 Retreived 72 articles
global_index is 11
Searching for profile of  Avraham Melkman
	Found a profile of Avraham Melkman under Ben Gurion Univerisity
	Search for author id BygRsi8AAAAJ
	 Retreived 53 articles
global_index is 12
Searching for profile of  Chen Keasar
	Found a profile of Chen keasar under Ben Gurion Univerisity
	Search for author id UgWH55gAAAAJ
	 Retreived 52 articles
global_index is 13
Searching for profile of  Michal Ziv-ukelson
	Found a profile of Michal Ziv-Ukelson under Ben Gurion Univerisity
	Search for author id Fp1QSKgAAAAJ
	 Retreived 75 articles
global_index is 14
Searching for profile of  Yefim Dinitz
	Did not find any profile of Yefim Dinitz. 'profiles'
	Did not find the profile of Yefim Dinitz under Ben Gurion university
Did not find profile for Yefim Dinitz... break
global_index is 15
Searching for prof

In [29]:
more_scraping = []
i_offset = 30
i_end = 51

for i,supervisor_dict in enumerate(supervisors_names[i_offset:i_end]):
    global_index = i_offset+i
    print(f"global_index is {global_index}")
    
    try:
        status = search_supervisor(supervisor_dict["name"],global_index) # i+1 because of supervisors_names[i_offset:] (to remain consistent)

        if status == 0:
            more_scraping.append(supervisor_dict)
    except Exception as e:
        print(f"An error occured while searching for {supervisor_dict["name"]}. {e}")

global_index is 30
Searching for profile of  Aryeh Kontorovich
	Found a profile of Aryeh Kontorovich under Ben Gurion Univerisity
	Search for author id UNVQ5DsAAAAJ
	 SerpAPI allows only 100 max articales at a time, YOU MIGHT NEED TO SCRAPE MORE
global_index is 31
Searching for profile of  Daniel Berend
	Found a profile of Daniel Berend under Ben Gurion Univerisity
	Search for author id k4zZo44AAAAJ
	 SerpAPI allows only 100 max articales at a time, YOU MIGHT NEED TO SCRAPE MORE
global_index is 32
Searching for profile of  Danny Hendler
	Found a profile of Danny Hendler under Ben Gurion Univerisity
	Search for author id Vz6nlzcAAAAJ
	 SerpAPI allows only 100 max articales at a time, YOU MIGHT NEED TO SCRAPE MORE
global_index is 33
Searching for profile of  Oded Margalit
	Did not find the profile of Oded Margalit under Ben Gurion university
Did not find profile for Oded Margalit... break
global_index is 34
Searching for profile of  Ronen Brafman
	Found a profile of Ronen Brafman under B

In [30]:
more_scraping = []
i_offset = 51
i_end = 55

for i,supervisor_dict in enumerate(supervisors_names[i_offset:i_end]):
    global_index = i_offset+i
    print(f"global_index is {global_index}")
    
    try:
        status = search_supervisor(supervisor_dict["name"],global_index) # i+1 because of supervisors_names[i_offset:] (to remain consistent)

        if status == 0:
            more_scraping.append(supervisor_dict)
    except Exception as e:
        print(f"An error occured while searching for {supervisor_dict["name"]}. {e}")

global_index is 51
Searching for profile of  Ofer Neiman
	Found a profile of Ofer Neiman under Ben Gurion Univerisity
	Search for author id oApOKYYAAAAJ
	 Retreived 72 articles
global_index is 52
Searching for profile of  Uri Stemmer
	Did not find the profile of Uri Stemmer under Ben Gurion university
Did not find profile for Uri Stemmer... break
global_index is 53
Searching for profile of  Dean Doron
	Found a profile of Dean Doron under Ben Gurion Univerisity
	Search for author id IhJrO6cAAAAJ
	 Retreived 37 articles
global_index is 54
Searching for profile of  Jonathan Mosheiff
	Found a profile of Jonathan Mosheiff under Ben Gurion Univerisity
	Search for author id FvlR-H4AAAAJ
An error occured while searching for Jonathan Mosheiff. 'articles'
