In [1]:
import pandas as pd
import numpy as np


In [2]:
df = pd.read_csv("consumer_complaints.csv",low_memory= False)
df.head(3)

Unnamed: 0,date_received,product,sub_product,issue,sub_issue,consumer_complaint_narrative,company_public_response,company,state,zipcode,tags,consumer_consent_provided,submitted_via,date_sent_to_company,company_response_to_consumer,timely_response,consumer_disputed?,complaint_id
0,08/30/2013,Mortgage,Other mortgage,"Loan modification,collection,foreclosure",,,,U.S. Bancorp,CA,95993,,,Referral,09/03/2013,Closed with explanation,Yes,Yes,511074
1,08/30/2013,Mortgage,Other mortgage,"Loan servicing, payments, escrow account",,,,Wells Fargo & Company,CA,91104,,,Referral,09/03/2013,Closed with explanation,Yes,Yes,511080
2,08/30/2013,Credit reporting,,Incorrect information on credit report,Account status,,,Wells Fargo & Company,NY,11764,,,Postal mail,09/18/2013,Closed with explanation,Yes,No,510473


In [3]:
df.columns

Index(['date_received', 'product', 'sub_product', 'issue', 'sub_issue',
       'consumer_complaint_narrative', 'company_public_response', 'company',
       'state', 'zipcode', 'tags', 'consumer_consent_provided',
       'submitted_via', 'date_sent_to_company', 'company_response_to_consumer',
       'timely_response', 'consumer_disputed?', 'complaint_id'],
      dtype='object')



- 'date_received': The date when the consumer complaint was received.
- 'product': The type of product or service associated with the consumer complaint.
- 'sub_product': Further categorization or sub-type of the product or service.
- 'issue': The main issue or problem reported by the consumer.
- 'sub_issue': Further categorization or sub-type of the issue.
- 'consumer_complaint_narrative': A description or narrative provided by the consumer explaining their complaint.
- 'company_public_response': The response or statement made by the company in public regarding the complaint.
- 'company': The name of the company involved in the complaint.
- 'state': The state where the consumer resides.
- 'zipcode': The postal code or ZIP code of the consumer's location.
- 'tags': Any additional tags or labels associated with the complaint.
- 'consumer_consent_provided': Indicates whether the consumer provided consent for the complaint to be shared publicly.
- 'submitted_via': The method or channel through which the complaint was submitted.
- 'date_sent_to_company': The date when the complaint was sent to the company for resolution.
- 'company_response_to_consumer': The response provided by the company to address the consumer's complaint.
- 'timely_response': Indicates whether the company responded to the complaint in a timely manner.
- 'consumer_disputed?': Indicates whether the consumer disputed the company's response.
- 'complaint_id': A unique identifier assigned to each consumer complaint.

In [4]:
df = df.drop(['tags', 'consumer_consent_provided'], axis=1)

In [5]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 555957 entries, 0 to 555956
Data columns (total 16 columns):
 #   Column                        Non-Null Count   Dtype 
---  ------                        --------------   ----- 
 0   date_received                 555957 non-null  object
 1   product                       555957 non-null  object
 2   sub_product                   397635 non-null  object
 3   issue                         555957 non-null  object
 4   sub_issue                     212622 non-null  object
 5   consumer_complaint_narrative  66806 non-null   object
 6   company_public_response       85124 non-null   object
 7   company                       555957 non-null  object
 8   state                         551070 non-null  object
 9   zipcode                       551452 non-null  object
 10  submitted_via                 555957 non-null  object
 11  date_sent_to_company          555957 non-null  object
 12  company_response_to_consumer  555957 non-null  object
 13 

In [6]:
df2 = df.replace(np.nan, 'null', regex=True)
print(df2)

       date_received           product                  sub_product   
0         08/30/2013          Mortgage               Other mortgage  \
1         08/30/2013          Mortgage               Other mortgage   
2         08/30/2013  Credit reporting                         null   
3         08/30/2013      Student loan     Non-federal student loan   
4         08/30/2013   Debt collection                  Credit card   
...              ...               ...                          ...   
555952    07/01/2014          Mortgage               Other mortgage   
555953    07/01/2014          Mortgage               Other mortgage   
555954    07/10/2012          Mortgage  Conventional fixed mortgage   
555955    04/14/2015   Debt collection                I do not know   
555956    08/14/2014   Debt collection                I do not know   

                                           issue   
0       Loan modification,collection,foreclosure  \
1       Loan servicing, payments, escrow ac

In [7]:
df2 = df2.drop(['consumer_complaint_narrative','company_public_response'], axis=1)

In [8]:
df2.replace(np.nan, 'Null', inplace=True)

In [9]:
df2.head(10)

Unnamed: 0,date_received,product,sub_product,issue,sub_issue,company,state,zipcode,submitted_via,date_sent_to_company,company_response_to_consumer,timely_response,consumer_disputed?,complaint_id
0,08/30/2013,Mortgage,Other mortgage,"Loan modification,collection,foreclosure",,U.S. Bancorp,CA,95993,Referral,09/03/2013,Closed with explanation,Yes,Yes,511074
1,08/30/2013,Mortgage,Other mortgage,"Loan servicing, payments, escrow account",,Wells Fargo & Company,CA,91104,Referral,09/03/2013,Closed with explanation,Yes,Yes,511080
2,08/30/2013,Credit reporting,,Incorrect information on credit report,Account status,Wells Fargo & Company,NY,11764,Postal mail,09/18/2013,Closed with explanation,Yes,No,510473
3,08/30/2013,Student loan,Non-federal student loan,Repaying your loan,Repaying your loan,"Navient Solutions, Inc.",MD,21402,Email,08/30/2013,Closed with explanation,Yes,Yes,510326
4,08/30/2013,Debt collection,Credit card,False statements or representation,Attempted to collect wrong amount,Resurgent Capital Services L.P.,GA,30106,Web,08/30/2013,Closed with explanation,Yes,Yes,511067
5,08/30/2013,Credit card,,Application processing delay,,Capital One,NY,12206,Phone,09/03/2013,Closed with explanation,Yes,Yes,510098
6,08/30/2013,Credit card,,Credit line increase/decrease,,Wells Fargo & Company,AZ,85730,Postal mail,09/05/2013,Closed with explanation,Yes,No,511062
7,08/30/2013,Bank account or service,Checking account,Deposits and withdrawals,,Bank of America,IL,60660,Referral,09/04/2013,Closed with explanation,Yes,No,511116
8,08/30/2013,Bank account or service,Checking account,Deposits and withdrawals,,Bank of America,GA,30016,Referral,09/04/2013,Closed with explanation,Yes,No,511091
9,09/17/2013,Mortgage,Conventional adjustable mortgage (ARM),"Loan modification,collection,foreclosure",,"SunTrust Banks, Inc.",CA,94551,Web,09/18/2013,Closed with explanation,Yes,Yes,530602


In [11]:
df2 = df2.to_csv('file2.csv')