In [31]:
import pandas as pd
import numpy as np
import nltk
from nltk.corpus import stopwords
from nltk import word_tokenize, FreqDist
from nltk.stem.wordnet import WordNetLemmatizer
import string
import plotly
import plotly.graph_objects as go
import plotly.express as px
from sklearn.feature_extraction.text import TfidfVectorizer, CountVectorizer

In [32]:
#Reading the dataset. 
df = pd.read_csv('complaints.csv')

  df = pd.read_csv('complaints.csv')


In [34]:
#Data frame before performing data cleanup.
df

Unnamed: 0,Date received,Product,Sub-product,Issue,Sub-issue,Consumer complaint narrative,Company public response,Company,State,ZIP code,Tags,Consumer consent provided?,Submitted via,Date sent to company,Company response to consumer,Timely response?,Consumer disputed?,Complaint ID
0,2022-11-22,"Credit reporting, credit repair services, or o...",Credit reporting,Improper use of your report,Reporting company used your report improperly,,,"EQUIFAX, INC.",OK,74447.0,Servicemember,,Web,2022-11-22,In progress,Yes,,6231498
1,2022-11-16,"Credit reporting, credit repair services, or o...",Credit reporting,Problem with a credit reporting company's inve...,Their investigation did not fix an error on yo...,,,Experian Information Solutions Inc.,NJ,7093.0,,,Web,2022-11-16,In progress,Yes,,6205383
2,2022-11-09,Debt collection,Credit card debt,Took or threatened to take negative or legal a...,Threatened or suggested your credit would be d...,,,"Genesis FS Card Services, Inc.",TX,75038.0,,,Web,2022-11-09,Closed with explanation,Yes,,6182699
3,2022-11-07,Mortgage,Other type of mortgage,Trouble during payment process,,,,NORTHERN OHIO INVESTMENT COMPANY,MO,63031.0,,Other,Web,2022-11-08,Untimely response,No,,6173945
4,2022-11-07,Debt collection,Other debt,Attempts to collect debt not owed,Debt is not yours,,,"R & R Collection Service, Inc.",MO,64154.0,,,Phone,2022-11-07,Untimely response,No,,6175998
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3096751,2017-02-09,Debt collection,I do not know,Cont'd attempts collect debt not owed,Debt resulted from identity theft,I have disputed my debts several times with no...,,Bonneville Billing and Collections,UT,84054.0,Servicemember,Consent provided,Web,2017-02-09,Closed with explanation,Yes,No,2334969
3096752,2015-04-29,Mortgage,Conventional fixed mortgage,"Loan modification,collection,foreclosure",,My father died in XX/XX/XXXX. Left me his only...,,"CITIBANK, N.A.",OK,74066.0,,Consent provided,Web,2015-04-29,Closed with explanation,Yes,No,1352738
3096753,2017-03-31,Credit reporting,,Credit reporting company's investigation,No notice of investigation status/result,cfbp i would Like to file a complaint on Exper...,Company has responded to the consumer and the ...,Experian Information Solutions Inc.,MN,55379.0,,Consent provided,Web,2017-03-31,Closed with non-monetary relief,Yes,Yes,2412926
3096754,2017-01-16,Credit reporting,,Incorrect information on credit report,Account status,My husband and I are in the middle of an FHA S...,Company has responded to the consumer and the ...,"TRANSUNION INTERMEDIATE HOLDINGS, INC.",GA,30215.0,,Consent provided,Web,2017-01-16,Closed with explanation,Yes,No,2292586


##### Since we are only focused on the products and complaint narratives, we will re-create the data frame using only the 'Product' and 'Consumer complaint narrative' attributes.

In [35]:
df = df[['Product' , 'Consumer complaint narrative']]

In [36]:
#Rename attributes of dataframe for simplicity.
df = df.rename(columns={'Product' : "Product",
                       'Consumer complaint narrative' : "Complaint"})

In [38]:
#Data frame after renaming attributes.
df

Unnamed: 0,Product,Complaint
0,"Credit reporting, credit repair services, or o...",
1,"Credit reporting, credit repair services, or o...",
2,Debt collection,
3,Mortgage,
4,Debt collection,
...,...,...
3096751,Debt collection,I have disputed my debts several times with no...
3096752,Mortgage,My father died in XX/XX/XXXX. Left me his only...
3096753,Credit reporting,cfbp i would Like to file a complaint on Exper...
3096754,Credit reporting,My husband and I are in the middle of an FHA S...


In [39]:
df.groupby('Product').count()

Unnamed: 0_level_0,Complaint
Product,Unnamed: 1_level_1
Bank account or service,14885
Checking or savings account,54190
Consumer Loan,9470
Credit card,18838
Credit card or prepaid card,81787
Credit reporting,31588
"Credit reporting, credit repair services, or other personal consumer reports",514280
Debt collection,191946
"Money transfer, virtual currency, or money service",26556
Money transfers,1497


##### Many of the products of the same category can be merged together into a single product category.

In [40]:
#Clean up data by renaming similar products into one category of products.
df['Product'].replace({'Bank account or service': 'Banking Services',
                       'Checking or savings account' : 'Banking Services',
                       'Consumer Loan' : 'Loans',
                       'Credit card' : 'Credit/Prepaid Cards',
                       'Credit card or prepaid card' : 'Credit/Prepaid Cards',
                       'Credit reporting' : 'Credit Reporting and Services',
                       'Credit reporting, credit repair services, or other personal consumer reports' : 'Credit Reporting and Services',
                       'Debt collection' : 'Debt Collection',
                       'Money transfer, virtual currency, or money service' : 'Banking Services',
                       'Money transfers' : 'Banking Services',
                       'Mortgage' : 'Mortgages',
                       'Other financial service' : 'Banking Services',
                       'Payday loan' : 'Loans',
                       'Payday loan, title loan, or personal loan' : 'Loans',
                       'Prepaid card' : 'Credit/Prepaid Cards',
                       'Student loan' : 'Loans',
                       'Vehicle loan or lease' : 'Loans',
                       'Virtual currency' : 'Crypto Currency'}, inplace=True)

In [42]:
#Refined products and their counts.
df.groupby('Product').count()

Unnamed: 0_level_0,Complaint
Product,Unnamed: 1_level_1
Banking Services,97420
Credit Reporting and Services,545868
Credit/Prepaid Cards,102075
Crypto Currency,16
Debt Collection,191946
Loans,77347
Mortgages,97748


In [44]:
#Data frame with refined product categories.
df

Unnamed: 0,Product,Complaint
0,Credit Reporting and Services,
1,Credit Reporting and Services,
2,Debt Collection,
3,Mortgages,
4,Debt Collection,
...,...,...
3096751,Debt Collection,I have disputed my debts several times with no...
3096752,Mortgages,My father died in XX/XX/XXXX. Left me his only...
3096753,Credit Reporting and Services,cfbp i would Like to file a complaint on Exper...
3096754,Credit Reporting and Services,My husband and I are in the middle of an FHA S...
