In [13]:
import pandas as pd
import os
from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer
from bs4 import BeautifulSoup
from statistics import mean 

## Personal Loan Companies Analyzed  
1) Upstart: https://www.creditkarma.com/reviews/personal-loan/single/id/upstart-personal-loans   
2) LendingPoint: https://www.creditkarma.com/reviews/personal-loan/single/id/lending-point-personal-loans  
3) Marcus by Goldman Sachs: https://www.creditkarma.com/reviews/personal-loan/single/id/goldmansachs  
4) Prosper Personal Loans: https://www.creditkarma.com/reviews/personal-loan/single/id/prosper-personal-loans  
5) Upgrade: https://www.creditkarma.com/reviews/personal-loan/single/id/upgrade_personal  
6) Best Egg: https://www.creditkarma.com/reviews/personal-loan/single/id/BestEgg  
7) One Main Financial Personal Loans: https://www.creditkarma.com/reviews/personal-loan/single/id/onemain-financial-personal-loans  

### Data Cleaning
Remove any HTML tags in scraped content

In [14]:
posts = []
names = []
total_posts = 0
for filename in os.listdir("./data"):
    if filename.endswith(".csv") and filename != "lc_accepted.csv":
        post = pd.read_csv("./data/" + filename, skiprows=[1])
        total_posts += len(post)
        post.rename(columns = {"text":filename}, inplace = True)
        names.append(filename)
        posts.append(post)
    
for post, filename in zip(posts, names):
    post[filename] = [BeautifulSoup(text,"lxml").get_text() for text in post[filename] ]
print("Total Posts:", total_posts)

Total Posts: 2084


### Data analysis
Apply VADER toolkit to calculate cumulative positive sentiment among personal loan companies

In [15]:
analyzer = SentimentIntensityAnalyzer()
scores_map = {}
for post, filename in zip(posts, names):
    scores = []
    for comment in post[filename]:
        vs = analyzer.polarity_scores(comment)
        scores.append(vs)
    scores_map[filename] = scores

In [16]:
cpd_sum = 0
for company in scores_map:
    scores = scores_map[company]
    cpd_sum = 0
    for score in scores:
        cpd_sum += score["pos"]
    scores_map[company] = cpd_sum
    
ranks = dict(sorted(scores_map.items(), key=lambda item: item[1]))
avg_rank = sum([ranks[company] for company in ranks]) / len(ranks)

In [45]:
sent_df = pd.DataFrame()
sent_df["Company"] = []
sent_df["Cumulative Positive VADER Sentiment"] = []
idx = 1
for company in ranks:
    sent_df.loc[idx] = [company, ranks[company]]
    idx += 1
sent_df.sort_values(by = ["Cumulative Positive VADER Sentiment"], inplace = True, ascending = False)
sent_df.loc[idx] = ["Average", avg_rank]
sent_df

Unnamed: 0,Company,Cumulative Positive VADER Sentiment
7,upstart_reviews.csv,235.588
6,upgrade_reviews.csv,87.393
5,onemain_reviews.csv,75.5
4,prosper_reviews.csv,55.836
3,best_egg_reviews.csv,52.732
2,lendingpoint_reviews.csv,26.514
1,marcus_reviews.csv,19.436
8,Average,78.999857
