# Merging Glassdoor Files

In [2]:
import pandas as pd
import numpy as np

In [120]:
def label_rating (row, column):
    ''' Labels rating based on the updated schema we have. Rating columns are currently read as a text character, 
        and we need to coerce to # value.'''
    if row[column] == 'css-11rm5hs':
        return 3.5
    if row[column] == 'css-1areqgb':
        return 4.5
    if row[column] == 'css-z2gtf':
        return 2.5
    if row[column] == 'css-qt3l8j':
        return 1.5
    if row[column] == '1':
        return 1
    if row[column] == '2':
        return 2
    if row[column] == '3':
        return 3
    if row[column] == '4':
        return 4
    if row[column] == '5':
        return 5
    else:
        return row[column]

def combine_files(companies_to_run, run_number, missing_companies = []):
    
    master_data = pd.DataFrame(columns = ['Glassdoor Review Page', 'supposed_n_reviews', 'page', 'rating_overall',
                                          'employment_status', 'review_title', 'review_link', 'review_date', 
                                          'employee_job', 'employee_loc', 'review_pros', 'review_cons', 
                                          'review_advice_mgmt', 'flag_incomplete', 'rating_components.Work/Life Balance', 
                                          'rating_components.Culture & Values', 'rating_components.Diversity & Inclusion', 
                                          'rating_components.Career Opportunities', 
                                          'rating_components.Compensation and Benefits', 
                                          'rating_components.Senior Management', 'summary_views.Recommend', 
                                          'summary_views.CEO Approval', 'summary_views.Business Outlook'
                                     ])
    c = 0
    
    for company in companies_to_run:
        c += 1
        file_path = "./data/companies/" + company[34:-4] + ".csv"
        print(f"Adding {company[34:-4]} to Master File")
        try:
            new_data = pd.read_csv(file_path)
        except FileNotFoundError as not_found:
            print(f"File Not Found: {company[34:-4]}")
            missing_companies.append(company)
            continue
        
        print(f"{company[34:-4]} has {len(new_data)} Reviews")
    
        ratings = ['rating_components.Work/Life Balance', 'rating_components.Career Opportunities', 
                    'rating_components.Compensation and Benefits', 'rating_components.Senior Management',
                    'rating_components.Culture & Values', 'rating_components.Diversity & Inclusion'
                  ]
    
        for rating in ratings:
            ### New error arises here. First guess is to do a 'Try Except' clause for Key Error
            # Create new column where all values are NA, since it didn't capture those
            try:
                new_data[rating] = new_data.apply(lambda row: label_rating(row, rating), axis=1)
            except KeyError:
                new_data[rating] = np.nan
            
    
        master_data = master_data.append(new_data)
        length = len(master_data)
        print(f"Total Reviews: {length}")
        if length > 900000:
            print('Max Rows Hit, Outputting File to data/glassdoor_companies_' + str(run_number) + '.csv')
            master_data.to_csv("data/glassdoor_companies_" + str(run_number) + ".csv", sep = ",")
            break
      
    companies_to_run_left = companies_to_run[c:]
    if not companies_to_run_left:
        print('Reached the End of the Companies, Outputting File to data/glassdoor_companies_' + str(run_number) + '.csv')
        master_data.to_csv("data/glassdoor_companies_" + str(run_number) + ".csv", sep = ",")
    
    return missing_companies, companies_to_run_left

In [115]:
supposed_james_companies = pd.read_csv("./data/james_summary.csv")['Glassdoor Review Page'].to_list()
mannor_companies = pd.read_csv("./scraped/sp1500_nreviews_RA.csv")['Glassdoor Review Page'].to_list()
companies_to_run = [company for company in mannor_companies if company not in supposed_james_companies]
# Use companies_to_run as our list of companies to add together.
companies_already_ran = [company for company in mannor_companies if company in supposed_james_companies]

missing_companies_2, companies_to_run_2 = combine_files(companies_to_run, run_number = 1)

Adding Spectrum-Reviews-E1500006 to Master File
Spectrum-Reviews-E1500006 has 11991 Reviews
Total Reviews: 11991
Adding Dollar-General-Reviews-E1342 to Master File
Dollar-General-Reviews-E1342 has 11527 Reviews
Total Reviews: 23518
Adding J-C-Penney-Reviews-E361 to Master File
J-C-Penney-Reviews-E361 has 11431 Reviews
Total Reviews: 34949
Adding Lockheed-Martin-Reviews-E404 to Master File
Lockheed-Martin-Reviews-E404 has 6730 Reviews
Total Reviews: 41679
Adding Procter-and-Gamble-Reviews-E544 to Master File
Procter-and-Gamble-Reviews-E544 has 10071 Reviews
Total Reviews: 51750
Adding NIKE-Reviews-E1699 to Master File
NIKE-Reviews-E1699 has 9765 Reviews
Total Reviews: 61515
Adding Ulta-Beauty-Reviews-E9466 to Master File
Ulta-Beauty-Reviews-E9466 has 9629 Reviews
Total Reviews: 71144
Adding Wendy-s-Reviews-E728 to Master File
Wendy-s-Reviews-E728 has 9440 Reviews
Total Reviews: 80584
Adding BNY-Mellon-Reviews-E78 to Master File
BNY-Mellon-Reviews-E78 has 9060 Reviews
Total Reviews: 8964

Total Reviews: 465845
Adding Cummins-Reviews-E187 to Master File
Cummins-Reviews-E187 has 5140 Reviews
Total Reviews: 470985
Adding MetLife-Reviews-E2899 to Master File
MetLife-Reviews-E2899 has 5079 Reviews
Total Reviews: 476064
Adding Yahoo-Reviews-E5807 to Master File
Yahoo-Reviews-E5807 has 4904 Reviews
Total Reviews: 480968
Adding Cintas-Reviews-E1256 to Master File
Cintas-Reviews-E1256 has 4919 Reviews
Total Reviews: 485887
Adding Halliburton-Reviews-E307 to Master File
Halliburton-Reviews-E307 has 4882 Reviews
Total Reviews: 490769
Adding McKesson-Reviews-E434 to Master File
McKesson-Reviews-E434 has 4778 Reviews
Total Reviews: 495547
Adding EXL-Service-Reviews-E37901 to Master File
EXL-Service-Reviews-E37901 has 4719 Reviews
Total Reviews: 500266
Adding Urban-Outfitters-Reviews-E2372 to Master File
Urban-Outfitters-Reviews-E2372 has 4696 Reviews
Total Reviews: 504962
Adding Kraft-Heinz-Company-Reviews-E1026712 to Master File
Kraft-Heinz-Company-Reviews-E1026712 has 4665 Reviews

Total Reviews: 732939
Adding Bristol-Myers-Squibb-Reviews-E107 to Master File
Bristol-Myers-Squibb-Reviews-E107 has 2969 Reviews
Total Reviews: 735908
Adding Regions-Financial-Reviews-E1397 to Master File
Regions-Financial-Reviews-E1397 has 2918 Reviews
Total Reviews: 738826
Adding Baxter-Reviews-E83 to Master File
Baxter-Reviews-E83 has 2921 Reviews
Total Reviews: 741747
Adding Electronic-Arts-Reviews-E1628 to Master File
Electronic-Arts-Reviews-E1628 has 2844 Reviews
Total Reviews: 744591
Adding Est%C3%A9e-Lauder-Companies-Reviews-E2785 to Master File
Est%C3%A9e-Lauder-Companies-Reviews-E2785 has 2838 Reviews
Total Reviews: 747429
Adding Gallagher-Reviews-E57 to Master File
Gallagher-Reviews-E57 has 1660 Reviews
Total Reviews: 749089
Adding Synopsys-Reviews-E2143 to Master File
Synopsys-Reviews-E2143 has 2600 Reviews
Total Reviews: 751689
Adding The-Children-s-Place-Reviews-E7028 to Master File
The-Children-s-Place-Reviews-E7028 has 2740 Reviews
Total Reviews: 754429
Adding CDK-Globa

Total Reviews: 889330
Adding Avon-Reviews-E67 to Master File
Avon-Reviews-E67 has 1480 Reviews
Total Reviews: 890810
Adding Guess-Reviews-E3209 to Master File
Guess-Reviews-E3209 has 1934 Reviews
Total Reviews: 892744
Adding National-Instruments-Reviews-E4030 to Master File
National-Instruments-Reviews-E4030 has 1917 Reviews
Total Reviews: 894661
Adding Finishline-Reviews-E2214 to Master File
Finishline-Reviews-E2214 has 1913 Reviews
Total Reviews: 896574
Adding Agilent-Technologies-Reviews-E9711 to Master File
Agilent-Technologies-Reviews-E9711 has 1914 Reviews
Total Reviews: 898488
Adding Steak-n-Shake-Reviews-E1296 to Master File
Steak-n-Shake-Reviews-E1296 has 1901 Reviews
Total Reviews: 900389
Max Rows Hit, Outputting File to data/glassdoor_companies_1.csv


In [121]:
missing_companies_3, companies_to_run_3 = combine_files(companies_to_run_2, run_number = 2,
                                                      missing_companies = missing_companies_2)

Adding Chico-s-FAS-Reviews-E2296 to Master File
Chico-s-FAS-Reviews-E2296 has 1902 Reviews
Total Reviews: 1902
Adding Molina-Healthcare-Reviews-E15828 to Master File
Molina-Healthcare-Reviews-E15828 has 1883 Reviews
Total Reviews: 3785
Adding Signet-Jewelers-Reviews-E5758 to Master File
Signet-Jewelers-Reviews-E5758 has 1857 Reviews
Total Reviews: 5642
Adding Cadence-Design-Systems-Reviews-E1217 to Master File
Cadence-Design-Systems-Reviews-E1217 has 1882 Reviews
Total Reviews: 7524
Adding Motorola-Solutions-Reviews-E427189 to Master File
Motorola-Solutions-Reviews-E427189 has 1139 Reviews
Total Reviews: 8663
Adding The-Buckle-Reviews-E2209 to Master File
The-Buckle-Reviews-E2209 has 1842 Reviews
Total Reviews: 10505
Adding Aaron-s-Reviews-E1083 to Master File
Aaron-s-Reviews-E1083 has 1800 Reviews
Total Reviews: 12305
Adding Fossil-Group-Reviews-E2319 to Master File
Fossil-Group-Reviews-E2319 has 1822 Reviews
Total Reviews: 14127
Adding Iron-Mountain-Reviews-E6774 to Master File
Iron-

Total Reviews: 112604
Adding ManTech-International-Corporation-Reviews-E13880 to Master File
ManTech-International-Corporation-Reviews-E13880 has 1431 Reviews
Total Reviews: 114035
Adding LHC-Group-Reviews-E37879 to Master File
LHC-Group-Reviews-E37879 has 1439 Reviews
Total Reviews: 115474
Adding Tech-Data-Reviews-E1944 to Master File
Tech-Data-Reviews-E1944 has 1404 Reviews
Total Reviews: 116878
Adding FTI-Consulting-Reviews-E6069 to Master File
FTI-Consulting-Reviews-E6069 has 1404 Reviews
Total Reviews: 118282
Adding Krispy-Kreme-Reviews-E6829 to Master File
Krispy-Kreme-Reviews-E6829 has 1420 Reviews
Total Reviews: 119702
Adding Unum-Reviews-E9522 to Master File
Unum-Reviews-E9522 has 1404 Reviews
Total Reviews: 121106
Adding Gilead-Sciences-Reviews-E2016 to Master File
Gilead-Sciences-Reviews-E2016 has 1391 Reviews
Total Reviews: 122497
Adding CoreLogic-Reviews-E30994 to Master File
CoreLogic-Reviews-E30994 has 1383 Reviews
Total Reviews: 123880
Adding Reynolds-American-Reviews-E

Total Reviews: 199519
Adding United-States-Steel-Reviews-E1251 to Master File
United-States-Steel-Reviews-E1251 has 1077 Reviews
Total Reviews: 200596
Adding L-Brands-Reviews-E656 to Master File
L-Brands-Reviews-E656 has 1057 Reviews
Total Reviews: 201653
Adding Align-Technology-Reviews-E12898 to Master File
Align-Technology-Reviews-E12898 has 1087 Reviews
Total Reviews: 202740
Adding Select-Medical-Reviews-E11368 to Master File
Select-Medical-Reviews-E11368 has 1075 Reviews
Total Reviews: 203815
Adding Campbell-Soup-Company-Reviews-E129 to Master File
Campbell-Soup-Company-Reviews-E129 has 1050 Reviews
Total Reviews: 204865
Adding Hibbett-Sports-Reviews-E6133 to Master File
Hibbett-Sports-Reviews-E6133 has 1060 Reviews
Total Reviews: 205925
Adding Ultimate-Medical-Academy-Reviews-E370934 to Master File
Ultimate-Medical-Academy-Reviews-E370934 has 1048 Reviews
Total Reviews: 206973
Adding Zebra-Technologies-Reviews-E2089 to Master File
Zebra-Technologies-Reviews-E2089 has 1041 Reviews


Total Reviews: 265152
Adding SEI-Investments-Reviews-E1851 to Master File
SEI-Investments-Reviews-E1851 has 812 Reviews
Total Reviews: 265964
Adding John-Hancock-Reviews-E9835 to Master File
John-Hancock-Reviews-E9835 has 821 Reviews
Total Reviews: 266785
Adding La-Z-Boy-Reviews-E391 to Master File
La-Z-Boy-Reviews-E391 has 824 Reviews
Total Reviews: 267609
Adding Cubic-Reviews-E829 to Master File
Cubic-Reviews-E829 has 815 Reviews
Total Reviews: 268424
Adding Commvault-Reviews-E16184 to Master File
Commvault-Reviews-E16184 has 799 Reviews
Total Reviews: 269223
Adding Moog-Inc-Reviews-E950 to Master File
Moog-Inc-Reviews-E950 has 814 Reviews
Total Reviews: 270037
Adding Ebix-Reviews-E1328 to Master File
Ebix-Reviews-E1328 has 800 Reviews
Total Reviews: 270837
Adding Neustar-Reviews-E13026 to Master File
Neustar-Reviews-E13026 has 794 Reviews
Total Reviews: 271631
Adding Occidental-Petroleum-Reviews-E491 to Master File
Occidental-Petroleum-Reviews-E491 has 785 Reviews
Total Reviews: 272

Total Reviews: 316962
Adding Essex-Property-Trust-Reviews-E3630 to Master File
Essex-Property-Trust-Reviews-E3630 has 640 Reviews
Total Reviews: 317602
Adding AES-Corporation-Reviews-E291 to Master File
AES-Corporation-Reviews-E291 has 644 Reviews
Total Reviews: 318246
Adding Teleflex-Reviews-E138296 to Master File
Teleflex-Reviews-E138296 has 653 Reviews
Total Reviews: 318899
Adding Timken-Reviews-E664 to Master File
Timken-Reviews-E664 has 644 Reviews
Total Reviews: 319543
Adding Plexus-Reviews-E1792 to Master File
Plexus-Reviews-E1792 has 643 Reviews
Total Reviews: 320186
Adding Valero-Energy-Reviews-E704 to Master File
Valero-Energy-Reviews-E704 has 638 Reviews
Total Reviews: 320824
Adding Camden-Property-Trust-Reviews-E2418 to Master File
Camden-Property-Trust-Reviews-E2418 has 631 Reviews
Total Reviews: 321455
Adding Pediatrix-Reviews-E4506 to Master File
Pediatrix-Reviews-E4506 has 636 Reviews
Total Reviews: 322091
Adding East-West-Bank-Reviews-E9089 to Master File
East-West-Ban

Total Reviews: 357718
Adding UltraTech-Cement-Reviews-E434272 to Master File
UltraTech-Cement-Reviews-E434272 has 526 Reviews
Total Reviews: 358244
Adding Woodward-Reviews-E6676 to Master File
Woodward-Reviews-E6676 has 507 Reviews
Total Reviews: 358751
Adding WaFd-Bank-Reviews-E2025 to Master File
WaFd-Bank-Reviews-E2025 has 511 Reviews
Total Reviews: 359262
Adding TiVo-Reviews-E9368 to Master File
TiVo-Reviews-E9368 has 507 Reviews
Total Reviews: 359769
Adding Aerojet-Rocketdyne-Reviews-E8468 to Master File
Aerojet-Rocketdyne-Reviews-E8468 has 513 Reviews
Total Reviews: 360282
Adding CorVel-Reviews-E1658 to Master File
CorVel-Reviews-E1658 has 509 Reviews
Total Reviews: 360791
Adding XO-Communications-Reviews-E7057 to Master File
XO-Communications-Reviews-E7057 has 501 Reviews
Total Reviews: 361292
Adding Symantec-Reviews-E3308959 to Master File
Symantec-Reviews-E3308959 has 510 Reviews
Total Reviews: 361802
Adding Hormel-Reviews-E877 to Master File
Hormel-Reviews-E877 has 508 Review

Total Reviews: 391153
Adding Valley-Bank-Reviews-E2007 to Master File
Valley-Bank-Reviews-E2007 has 416 Reviews
Total Reviews: 391569
Adding Amphenol-Reviews-E1967 to Master File
Amphenol-Reviews-E1967 has 398 Reviews
Total Reviews: 391967
Adding Digital-Realty-Reviews-E36929 to Master File
Digital-Realty-Reviews-E36929 has 397 Reviews
Total Reviews: 392364
Adding Ball-Corporation-Reviews-E74 to Master File
Ball-Corporation-Reviews-E74 has 409 Reviews
Total Reviews: 392773
Adding FMC-Reviews-E239 to Master File
FMC-Reviews-E239 has 395 Reviews
Total Reviews: 393168
Adding Teradyne-Reviews-E647 to Master File
Teradyne-Reviews-E647 has 409 Reviews
Total Reviews: 393577
Adding Merit-Medical-Systems-Reviews-E2079 to Master File
Merit-Medical-Systems-Reviews-E2079 has 403 Reviews
Total Reviews: 393980
Adding LL-Flooring-Reviews-E38603 to Master File
LL-Flooring-Reviews-E38603 has 402 Reviews
Total Reviews: 394382
Adding PPL-Reviews-E520 to Master File
PPL-Reviews-E520 has 393 Reviews
Total 

Total Reviews: 415917
Adding Prosperity-Bank-Reviews-E112985 to Master File
Prosperity-Bank-Reviews-E112985 has 324 Reviews
Total Reviews: 416241
Adding Avient-Reviews-E12620 to Master File
Avient-Reviews-E12620 has 322 Reviews
Total Reviews: 416563
Adding Domtar-Reviews-E5883 to Master File
Domtar-Reviews-E5883 has 314 Reviews
Total Reviews: 416877
Adding Silicon-Labs-Reviews-E9122 to Master File
Silicon-Labs-Reviews-E9122 has 330 Reviews
Total Reviews: 417207
Adding Tumi-Reviews-E20782 to Master File
Tumi-Reviews-E20782 has 314 Reviews
Total Reviews: 417521
Adding Emergent-BioSolutions-Reviews-E9794 to Master File
Emergent-BioSolutions-Reviews-E9794 has 316 Reviews
Total Reviews: 417837
Adding PharMerica-Reviews-E894 to Master File
PharMerica-Reviews-E894 has 312 Reviews
Total Reviews: 418149
Adding Pioneer-Natural-Resources-Reviews-E3571 to Master File
Pioneer-Natural-Resources-Reviews-E3571 has 308 Reviews
Total Reviews: 418457
Adding Interpublic-Group-Reviews-E358 to Master File
I

Total Reviews: 436527
Adding First-Midwest-Bank-Reviews-E1415 to Master File
First-Midwest-Bank-Reviews-E1415 has 265 Reviews
Total Reviews: 436792
Adding Simmons-Bank-Reviews-E1940 to Master File
Simmons-Bank-Reviews-E1940 has 264 Reviews
Total Reviews: 437056
Adding Lumentum-Operations-Reviews-E1053151 to Master File
Lumentum-Operations-Reviews-E1053151 has 270 Reviews
Total Reviews: 437326
Adding HealthStream-Reviews-E10008 to Master File
HealthStream-Reviews-E10008 has 275 Reviews
Total Reviews: 437601
Adding Tupperware-Reviews-E5798 to Master File
Tupperware-Reviews-E5798 has 261 Reviews
Total Reviews: 437862
Adding SPX-Flow-Reviews-E1077274 to Master File
SPX-Flow-Reviews-E1077274 has 256 Reviews
Total Reviews: 438118
Adding iRobot-Reviews-E35096 to Master File
iRobot-Reviews-E35096 has 268 Reviews
Total Reviews: 438386
Adding Marten-Transport-Reviews-E1621 to Master File
Marten-Transport-Reviews-E1621 has 259 Reviews
Total Reviews: 438645
Adding DXP-Enterprises-Reviews-E7693 to 

Total Reviews: 453325
Adding Eaton-Vance-Reviews-E3553 to Master File
Eaton-Vance-Reviews-E3553 has 196 Reviews
Total Reviews: 453521
Adding CONSOL-Energy-Reviews-E2751 to Master File
CONSOL-Energy-Reviews-E2751 has 200 Reviews
Total Reviews: 453721
Adding F-N-B-Corporation-Reviews-E4832 to Master File
F-N-B-Corporation-Reviews-E4832 has 190 Reviews
Total Reviews: 453911
Adding Knowles-Corporation-Reviews-E25451 to Master File
Knowles-Corporation-Reviews-E25451 has 197 Reviews
Total Reviews: 454108
Adding Greif-Reviews-E5368 to Master File
Greif-Reviews-E5368 has 202 Reviews
Total Reviews: 454310
Adding MKS-Instruments-Reviews-E7469 to Master File
MKS-Instruments-Reviews-E7469 has 204 Reviews
Total Reviews: 454514
Adding Matthews-International-Reviews-E2695 to Master File
Matthews-International-Reviews-E2695 has 195 Reviews
Total Reviews: 454709
Adding Northwest-Bank-Reviews-E333433 to Master File
Northwest-Bank-Reviews-E333433 has 200 Reviews
Total Reviews: 454909
Adding Rambus-Review

Total Reviews: 465521
Adding Xylem-Inc-Reviews-E2612822 to Master File
Xylem-Inc-Reviews-E2612822 has 146 Reviews
Total Reviews: 465667
Adding Quanta-Services-Reviews-E7594 to Master File
Quanta-Services-Reviews-E7594 has 145 Reviews
Total Reviews: 465812
Adding Darling-Ingredients-Reviews-E2759 to Master File
Darling-Ingredients-Reviews-E2759 has 145 Reviews
Total Reviews: 465957
Adding Great-Western-Bancorp-Reviews-E23341 to Master File
Great-Western-Bancorp-Reviews-E23341 has 144 Reviews
Total Reviews: 466101
Adding Comfort-Systems-USA-Reviews-E10050 to Master File
Comfort-Systems-USA-Reviews-E10050 has 147 Reviews
Total Reviews: 466248
Adding Clearwater-Paper-Reviews-E229811 to Master File
Clearwater-Paper-Reviews-E229811 has 143 Reviews
Total Reviews: 466391
Adding MiMedx-Group-Reviews-E283952 to Master File
MiMedx-Group-Reviews-E283952 has 146 Reviews
Total Reviews: 466537
Adding Walker-and-Dunlop-Reviews-E354637 to Master File
Walker-and-Dunlop-Reviews-E354637 has 149 Reviews
To

Total Reviews: 474076
Adding Cantel-Medical-Reviews-E2648 to Master File
Cantel-Medical-Reviews-E2648 has 103 Reviews
Total Reviews: 474179
Adding CoreSite-Reviews-E354579 to Master File
CoreSite-Reviews-E354579 has 109 Reviews
Total Reviews: 474288
Adding Impax-Laboratories-Reviews-E5676 to Master File
Impax-Laboratories-Reviews-E5676 has 109 Reviews
Total Reviews: 474397
Adding First-Financial-Bank-Texas-Reviews-E2563 to Master File
First-Financial-Bank-Texas-Reviews-E2563 has 104 Reviews
Total Reviews: 474501
Adding U-S-Silica-Reviews-E15082 to Master File
U-S-Silica-Reviews-E15082 has 102 Reviews
Total Reviews: 474603
Adding Barnes-Group-Reviews-E80 to Master File
Barnes-Group-Reviews-E80 has 109 Reviews
Total Reviews: 474712
Adding Gentherm-Reviews-E824986 to Master File
Gentherm-Reviews-E824986 has 111 Reviews
Total Reviews: 474823
Adding Stamps-com-Reviews-E8800 to Master File
Stamps-com-Reviews-E8800 has 104 Reviews
Total Reviews: 474927
Adding Badger-Meter-Reviews-E777 to Mast

Total Reviews: 480840
Adding Sompo-International-Holdings-Reviews-E3202401 to Master File
Sompo-International-Holdings-Reviews-E3202401 has 80 Reviews
Total Reviews: 480920
Adding First-Niagara-Financial-Reviews-E7595 to Master File
First-Niagara-Financial-Reviews-E7595 has 78 Reviews
Total Reviews: 480998
Adding WGL-Reviews-E268948 to Master File
WGL-Reviews-E268948 has 77 Reviews
Total Reviews: 481075
Adding We-Are-Alexander-Reviews-E1469823 to Master File
We-Are-Alexander-Reviews-E1469823 has 74 Reviews
Total Reviews: 481149
Adding PS-Business-Parks-Reviews-E6767 to Master File
File Not Found: PS-Business-Parks-Reviews-E6767
Adding Mueller-Industries-Reviews-E823 to Master File
Mueller-Industries-Reviews-E823 has 73 Reviews
Total Reviews: 481222
Adding Citizens-Business-Bank-Reviews-E634580 to Master File
Citizens-Business-Bank-Reviews-E634580 has 82 Reviews
Total Reviews: 481304
Adding TopBuild-Reviews-E1092703 to Master File
TopBuild-Reviews-E1092703 has 72 Reviews
Total Reviews: 

Total Reviews: 485424
Adding Installed-Building-Products-Reviews-E389453 to Master File
Installed-Building-Products-Reviews-E389453 has 56 Reviews
Total Reviews: 485480
Adding Rayonier-Advanced-Materials-Reviews-E2342058 to Master File
Rayonier-Advanced-Materials-Reviews-E2342058 has 60 Reviews
Total Reviews: 485540
Adding Dime-Community-Bank-Reviews-E6163 to Master File
Dime-Community-Bank-Reviews-E6163 has 58 Reviews
Total Reviews: 485598
Adding Hawkins-Reviews-E1478 to Master File
Hawkins-Reviews-E1478 has 61 Reviews
Total Reviews: 485659
Adding SunCoke-Energy-Reviews-E305808 to Master File
SunCoke-Energy-Reviews-E305808 has 55 Reviews
Total Reviews: 485714
Adding Century-Aluminum-Reviews-E5383 to Master File
Century-Aluminum-Reviews-E5383 has 60 Reviews
Total Reviews: 485774
Adding United-Insurance-Group-Reviews-E321622 to Master File
United-Insurance-Group-Reviews-E321622 has 55 Reviews
Total Reviews: 485829
Adding Pioneer-Energy-Services-Reviews-E725601 to Master File
Pioneer-Ene

Total Reviews: 488502
Adding MB-Technology-Solutions-Reviews-E741239 to Master File
MB-Technology-Solutions-Reviews-E741239 has 35 Reviews
Total Reviews: 488537
Adding Community-Bank-System-Reviews-E1287 to Master File
Community-Bank-System-Reviews-E1287 has 34 Reviews
Total Reviews: 488571
Adding Universal-Forest-Products-Eastern-Division-Reviews-E292253 to Master File
Universal-Forest-Products-Eastern-Division-Reviews-E292253 has 39 Reviews
Total Reviews: 488610
Adding Headwaters-Reviews-E8339 to Master File
Headwaters-Reviews-E8339 has 37 Reviews
Total Reviews: 488647
Adding Fabrinet-Reviews-E29856 to Master File
Fabrinet-Reviews-E29856 has 37 Reviews
Total Reviews: 488684
Adding Astec-Industries-Reviews-E1146 to Master File
Astec-Industries-Reviews-E1146 has 39 Reviews
Total Reviews: 488723
Adding Navigators-Reviews-E1688 to Master File
Navigators-Reviews-E1688 has 40 Reviews
Total Reviews: 488763
Adding Bob-Evans-Farms-Reviews-E2128067 to Master File
Bob-Evans-Farms-Reviews-E21280

Total Reviews: 490412
Adding Host-Hotels-and-Resorts-Reviews-E305 to Master File
Host-Hotels-and-Resorts-Reviews-E305 has 16 Reviews
Total Reviews: 490428
Adding Harris-Harris-Reviews-E1267523 to Master File
Harris-Harris-Reviews-E1267523 has 14 Reviews
Total Reviews: 490442
Adding Tesoro-Reviews-E4058484 to Master File
Tesoro-Reviews-E4058484 has 12 Reviews
Total Reviews: 490454
Adding Affiliated-Managers-Group-Reviews-E7172 to Master File
Affiliated-Managers-Group-Reviews-E7172 has 17 Reviews
Total Reviews: 490471
Adding Pinnacle-West-Reviews-E534 to Master File
Pinnacle-West-Reviews-E534 has 19 Reviews
Total Reviews: 490490
Adding WYNDHAM-WORLDWIDE-OPERATIONS-Reviews-E3271307 to Master File
WYNDHAM-WORLDWIDE-OPERATIONS-Reviews-E3271307 has 18 Reviews
Total Reviews: 490508
Adding Fluor-Corporation-Reviews-E5050687 to Master File
Fluor-Corporation-Reviews-E5050687 has 17 Reviews
Total Reviews: 490525
Adding Scripps-Networks-Interactive-Reviews-E3994230 to Master File
Scripps-Networks-

Total Reviews: 491234
Adding PTC-TX-Reviews-E5003588 to Master File
PTC-TX-Reviews-E5003588 has 2 Reviews
Total Reviews: 491236
Adding Arris-Composites-Reviews-E2591441 to Master File
Arris-Composites-Reviews-E2591441 has 10 Reviews
Total Reviews: 491246
Adding IDACORP-Reviews-E347 to Master File
IDACORP-Reviews-E347 has 3 Reviews
Total Reviews: 491249
Adding Genesee-Reviews-E2288105 to Master File
Genesee-Reviews-E2288105 has 7 Reviews
Total Reviews: 491256
Adding Hawaiian-Electric-Industries-Reviews-E320 to Master File
Hawaiian-Electric-Industries-Reviews-E320 has 3 Reviews
Total Reviews: 491259
Adding Webster-Financial-Reviews-E2032 to Master File
Webster-Financial-Reviews-E2032 has 10 Reviews
Total Reviews: 491269
Adding Thor-Industries-Reviews-E661 to Master File
Thor-Industries-Reviews-E661 has 9 Reviews
Total Reviews: 491278
Adding Crane-Crane-Reviews-E7578716 to Master File
Crane-Crane-Reviews-E7578716 has 1 Reviews
Total Reviews: 491279
Adding Urban-Edge-Properties-Reviews-E12

Total Reviews: 491532
Adding REX-Stores-Reviews-E2657 to Master File
REX-Stores-Reviews-E2657 has 5 Reviews
Total Reviews: 491537
Adding Enanta-Pharmaceuticals-Reviews-E261179 to Master File
Enanta-Pharmaceuticals-Reviews-E261179 has 10 Reviews
Total Reviews: 491547
Adding Celadon-Art-Reviews-E1547821 to Master File
Celadon-Art-Reviews-E1547821 has 8 Reviews
Total Reviews: 491555
Adding Northern-Oil-and-Gas-Reviews-E1740141 to Master File
Northern-Oil-and-Gas-Reviews-E1740141 has 2 Reviews
Total Reviews: 491557
Adding FutureFuel-Reviews-E354194 to Master File
FutureFuel-Reviews-E354194 has 10 Reviews
Total Reviews: 491567
Adding Contango-Reviews-E13157 to Master File
Contango-Reviews-E13157 has 5 Reviews
Total Reviews: 491572
Adding Kopin-Reviews-E2168 to Master File
Kopin-Reviews-E2168 has 9 Reviews
Total Reviews: 491581
Reached the End of the Companies, Outputting File to data/glassdoor_companies_2.csv


In [122]:
missing_companies_3
# Here are all the companies we are missing

['https://www.glassdoor.com/Reviews/Ross-Stores-Reviews-E1843.htm',
 'https://www.glassdoor.com/Reviews/FIS-Reviews-E313114.htm',
 'https://www.glassdoor.com/Reviews/FedEx-Reviews-E246.htm',
 'https://www.glassdoor.com/Reviews/PNC-Financial-Services-Group-Reviews-E507.htm',
 'https://www.glassdoor.com/Reviews/Abercrombie-and-Fitch-Reviews-E6756.htm',
 'https://www.glassdoor.com/Reviews/General-Motors-GM-Reviews-E279.htm',
 'https://www.glassdoor.com/Reviews/Labcorp-Reviews-E1679.htm',
 'https://www.glassdoor.com/Reviews/Virtusa-Reviews-E29396.htm',
 'https://www.glassdoor.com/Reviews/Sherwin-Williams-Reviews-E599.htm',
 'https://www.glassdoor.com/Reviews/The-Hartford-Reviews-E4314.htm',
 'https://www.glassdoor.com/Reviews/SunTrust-Reviews-E631.htm',
 'https://www.glassdoor.com/Reviews/Whirlpool-Corporation-Reviews-E732.htm',
 'https://www.glassdoor.com/Reviews/Darden-Reviews-E4160.htm',
 'https://www.glassdoor.com/Reviews/Brookfield-Properties-retail-group-Reviews-E2317.htm',
 'https:/

### Read in Ticker and ISIN Data

In [136]:
# Read in data from data/2022 SP1500 Glassdoor Links.xlsx
# Isolate Ticker, ISIN, Glassdoor Review Page Nonhyperlink
# Read in Companies Data Created Above
# Merge on the Glassdoor Review Page = Glassdoor Review Page Nonhyperlink

def merge_ticker_issn(file_num):
    print("Loading Files")
    companies = pd.read_csv("./data/glassdoor_companies_" + str(file_num) + ".csv")
    ticker_data = pd.read_excel("./data/2022 SP1500 Glassdoor Links.xlsx")[['Ticker', 'ISIN', 'Glassdoor Review Page - non Hyperlink']]
    print("Files are loaded. Now merging.")
    merged = pd.merge(companies, ticker_data, 
                      left_on = 'Glassdoor Review Page', right_on = 'Glassdoor Review Page - non Hyperlink', 
                      how = 'left').drop('Glassdoor Review Page - non Hyperlink', axis=1)
    print("Files are merged. Now updating CSV.")
    merged.to_csv("./data/glassdoor_companies_" + str(file_num) + ".csv")
    print("File has been updated")
    
for i in range(1, 4):
    merge_ticker_issn(i)

Loading Files
Files are loaded. Now merging.
Files are merged. Now updating CSV.
File has been updated
Loading Files




Files are loaded. Now merging.
Files are merged. Now updating CSV.
File has been updated
Loading Files
Files are loaded. Now merging.
Files are merged. Now updating CSV.
File has been updated
