# INVESTOR INTELLIGENCE AGENT - GLASSDOOR SENTIMENT ANALYSIS¶

TASK:
1. Retrieve overall rating of Company (Ovr Rating, Job Recommendation Rating, CEO Approval Rating)
2. Extraction of Top Pros and Cons Reviews
3. Search for top N competitors
4. Benchmark against Industry Standards / Competitors

In [1]:
# import rpa as t for the new vision of TagUI same function just different name
import tagui as t


In [2]:
t.close()

[RPA][ERROR] - use init() before using close()


False

In [3]:
# Get Competitors from Google
def scrape_competitors(company):
    try:
        # Get Competitors from Google
        t.init(visual_automation = True) # visual automation if keyboard automation required in subsequent code
        t.url('https://www.google.com/') # go to google website
        t.type('//*[@name="q"]', 'who are the top 5 company competitors of ' + company + '[enter]')
        t.click('//div[contains(@class,"FPdoLc")]//input[1]')  # Click the search button

        top_competitors = []

        # Loop from 1 to 3 inclusive
        for i in range(1, 4):
            # Construct XPath expressions for top competitors
            top_competitors_xpath = '//*[@jsname="ibnC6b"][' + str(i) + ']//div[@data-attrid="BreadthFirstSRP"]'

            # Read top competitors using TagUI's t.read() function
            top_competitors.append(t.read(top_competitors_xpath))

        return top_competitors

    except Exception as e:
        print(f"Error occurred: {str(e)}")
        return None

In [4]:
# Scrape from ratings (Step 2)
# This step shall take place after logging in and searched company so that rating is available from searched company
def scrape_rating():
    try:
        company_rating = t.read('//header/span[contains(@class, "employerOverviewRating")]/text()')
        # print (company_rating)
        job_recommendation = t.read('//*[contains(@data-test, "recommendToFriendRating")]//*[contains(@class,"textVal")]')
        # print (job_recommendation)
        ceo_approval = t.read('//*[contains(@data-test, "ceoRating")]//*[contains(@class,"textVal")]')
        # print (ceo_approval)     

        return company_rating, job_recommendation, ceo_approval

    except Exception as e:
        print(f"Error occurred: {str(e)}")
        return None, None, None

In [5]:
# Scrape Top Review Highlights (qualitative) (Step 3)
# This step shall take place after logging in and searched company so that Reviews Tab is available for clicking
def scrape_top_reviews():
    try:
        t.wait(2)
        t.click('//*[contains(@data-test, "nav-reviews")]') # Click on Reviews Tab
        t.wait(5)
        t.click('//button[contains(text(), "Show More Pros and Cons")]') # click to exapnd Top Reviews Highlights by Sentiment
        t.wait(2)

        # Below codes were initial code to scrape all pros and cons reviews in single variable. Superseded by following codes to store as lists instead for individual reviews.
        # pros_reviews = t.read('//*[@id="ReviewHighlightsModule"]//div[contains(text(),"Pros")]/following-sibling::ul') # read all pros reviews
        # cons_reviews = t.read('//*[@id="ReviewHighlightsModule"]//div[contains(text(),"Cons")]/following-sibling::ul') # read all cons reviews

        # Initialize lists to store  headers and descriptions
        pros_header = []
        pros_description = []
        cons_header = []
        cons_description = []

        # Loop from 1 to 5 inclusive
        for i in range(1, 6):
            # Construct XPath expressions for cons headers and descriptions
            pros_header_xpath = '//*[@id="ReviewHighlightsModule"]//div[contains(text(),"Pros")]/following-sibling::ul//li[' + str(i) + ']//a'
            pros_description_xpath = '//*[@id="ReviewHighlightsModule"]//div[contains(text(),"Pros")]/following-sibling::ul/li[' + str(i) + ']'
            cons_header_xpath = '//*[@id="ReviewHighlightsModule"]//div[contains(text(),"Cons")]/following-sibling::ul//li[' + str(i) + ']//a'
            cons_description_xpath = '//*[@id="ReviewHighlightsModule"]//div[contains(text(),"Cons")]/following-sibling::ul/li[' + str(i) + ']'

            # Read cons header and description using TagUI's t.read() function
            pros_header.append(t.read(pros_header_xpath))
            pros_description.append(t.read(pros_description_xpath))
            cons_header.append(t.read(cons_header_xpath))
            cons_description.append(t.read(cons_description_xpath))
    
        return pros_header, pros_description, cons_header, cons_description

    except Exception as e:
        print(f"Error occurred: {str(e)}")
        return None, None, None, None



In [6]:
# Scrape from Glassdoor - Ratings and Top Review Highlights and Competitors' Ratings
def glassdoor_scrape(company):
    try:
        # Step 0 - search via google for top competitors
        top_competitors = scrape_competitors(company) 
#         print(top_competitors)

        # Step 1 - access glassdoor and search company
#         t.init(visual_automation = True) # visual automation if keyboard automation required in subsequent code
        t.url('https://www.glassdoor.com/') # go to glassdoor website
        t.wait(10)
        t.type('//*[@id="inlineUserEmail"]',  email +'[enter]') # login with specially created email address for glassdoor access
        t.wait(10)
        t.type('//*[@id="inlineUserPassword"]',  email_pw +'[enter]') # login with specially created email address for glassdoor access
        t.wait(10)
        t.click('//*[@id="ContentNav"]//a[contains(text(),"Companies")]') # click on Companies tab to search for companies
        t.wait(10)
        t.type('//*[contains(@placeholder,"Search for a Company")]',  company +'[enter]') # search for company
        t.click('//*[contains(@data-test,"company-search-button")]') # click on search because "enter" does not work
        t.wait(10)
        t.click('//*[@data-serp-pos="0"]//h2/a') # click on top return result

        # Step 2 - scrape rating of company
        company_rating, job_recommendation, ceo_approval = scrape_rating()
#         print(company_rating)
#         print(job_recommendation)
#         print(ceo_approval)

        # Step 3 - Scrape Top Review Highlights
        
        pros_header, pros_description, cons_header, cons_description = scrape_top_reviews()
#         print(pros_header)
#         print(pros_description)
#         print(cons_header)
#         print(cons_description)

        
        # Step 4 - scrape ratings of top competitors
        # Initialize lists to store ratings, job recommendations, and CEO approvals
        comp_rating = []
        comp_job_rec = []
        comp_ceo_approval = []

        # Loop from 0 to n exclusive
        n = len(top_competitors)
        for i in range(0, n):
            t.wait(2) # wait to simulate person clicking
            t.click('//*[contains(@data-test,"search-button")]') # click on search button
            t.wait(2) # wait to simulate person clicking
            t.click('//*[contains(@data-test,"clear-button")]') # click on clear button
            t.wait(2) # wait to simulate person clicking
            t.type('//*[contains(@data-test,"search-label")]', top_competitors[i] + '[enter]')
            t.wait(2) # wait to simulate person clicking
            t.click('//*[@id="SearchKeywordDefaultResults"]/li[1]') # click on first returned result button
            t.wait(2) # wait to simulate person clicking

            # Scrape rating, job recommendation, and CEO approval and append to respective lists
            rating, job_rec, ceo_approval = scrape_rating()
            comp_rating.append(rating)
            comp_job_rec.append(job_rec)
            comp_ceo_approval.append(ceo_approval)

#         print (comp_rating)
#         print (comp_job_rec)
#         print (comp_ceo_approval)

        t.click('//*[contains(@data-test, "utility-nav-dropdown")]/button[@aria-label="profile"]')
        t.click('//*[@id="UtilityNav"]/div[3]/div/div/ul[1]/li[6]/a[@data-test= "sign-out"]')
        t.close()

        return (
            company_rating, job_recommendation, ceo_approval,
            pros_header, pros_description, cons_header, cons_description,
            comp_rating, comp_job_rec, comp_ceo_approval, top_competitors
        )

    except Exception as e:
        print(f"Error occurred: {str(e)}")
        return None, None, None, None, None, None, None, None, None, None, None

In [7]:
# Example Usage:
email = "i2a.isa.iss@gmail.com" # for glassdoor access
email_pw = "i2aproject" # for glassdoor access
company = 'apple' # specify company of interest

(company_rating, job_recommendation, ceo_approval, pros_header, pros_description, cons_header, cons_description, comp_rating, comp_job_rec, comp_ceo_approval, top_competitors) = glassdoor_scrape(company) 



[RPA][ERROR] - cannot find //header/span[contains(@class, "employerOverviewRating")]/text()


In [22]:
# Comparing against Competitors' Ratings
company_rating_rank = 1
company_job_rec_rank = 1
company_ceo_app_rank = 1
n = len(top_competitors)

for i in range(0, n):
    if company_rating < comp_rating [i]:
        company_rating_rank += 1
    if job_recommendation < comp_job_rec [i]:
        company_job_rec_rank += 1
    if ceo_approval < comp_ceo_approval [i]:
        company_ceo_app_rank += 1

# Output Ranking:
print(f"{n} number of competitors assessed are: {top_competitors}")
print(f"{company}'s ratings ranks {company_rating_rank} out of {n+1}")
print(f"{company}'s job recommendation rating ranks {company_job_rec_rank} out of {n+1}")
print(f"{company}'s ceo approval rating ranks {company_ceo_app_rank} out of {n+1}")

3 number of competitors assessed are: ['Microsoft', 'Samsung', 'Google']
apple's ratings ranks 2 out of 4
apple's job recommendation rating ranks 3 out of 4
apple's ceo approval rating ranks 2 out of 4


In [8]:
company_rating

'4.2'

In [9]:
job_recommendation

'80'

In [10]:
ceo_approval

'78'

In [11]:
pros_header

['Great benefits',
 'great and the people',
 'pay is good',
 'Great culture',
 'team is great']

In [12]:
pros_description

['"Great benefits and a nice feeling to be apart of a the worlds most valuable brand."\xa0(in 4986 reviews)',
 '"Working from home is great and the people you work with and meet are the best."\xa0(in 2542 reviews)',
 '"pay is good but expect to work a lot if you want to clime the latter up"\xa0(in 2247 reviews)',
 '"Great culture and ability to work your way up if you are motivated or interested in a career at Apple."\xa0(in 1042 reviews)',
 '"The team is great and you bound with peers that make your days easier and manage the overall stress"\xa0(in 721 reviews)']

In [13]:
cons_header

['No work life balance',
 'Poor management',
 'Long hours',
 'bad managers',
 'time off']

In [14]:
cons_description

['"No work life balance (made you feel bad about taking time off for mental health reasons)"\xa0(in 1408 reviews)',
 '"Poor management and tolls"\xa0(in 1269 reviews)',
 '"Long hours and have to take calls back to back Didn\'t enjoy the job at all."\xa0(in 838 reviews)',
 '"But know that there is 0 accountability for bad managers and so there are toxic teams that go undetected."\xa0(in 832 reviews)',
 '"They are very hard on you about taking time off and want you to always be at everything."\xa0(in 240 reviews)']

In [15]:
comp_rating

['', '3.8', '4.4']

In [16]:
comp_job_rec

['86', '67', '85']

In [17]:
comp_ceo_approval

['91', '72', '78']

In [18]:
top_competitors

['Microsoft', 'Samsung', 'Google']