In [1]:
import pandas as pd
import numpy as np
import csv
import json
import datetime as dt

In [2]:
pd.set_option('display.max_rows', None)
pd.set_option('display.max_columns', None)
pd.set_option('display.width', None)
pd.set_option('display.max_colwidth', None)

In [3]:
def json2csv(json_name, csv_name, key):
    with open(json_name) as json_file:
        data = json.load(json_file)
    transaction_data = data[key]
    data_file = open(csv_name, 'w') 
    csv_writer = csv.writer(data_file)
    count = 0
    for tran in transaction_data:
        if count == 0: 
            # Writing headers of CSV file 
            header = tran.keys()
            csv_writer.writerow(header) 
            count += 1
        # Writing data of CSV file 
        csv_writer.writerow(tran.values())
    data_file.close() 

In [4]:
json2csv('shreya_fidata.json', 'fish102.csv', 'fiData')

In [5]:
df = pd.read_csv('fish102.csv')

In [6]:
df

Unnamed: 0,transactionTimestamp,txnId,mode,amount,balance,type,narration,reference,valueDate
0,2019-08-01T019:38:02.000Z,M63132270,FT,150000,539769,CREDIT,salfintechproductsandsolutions August 2019,RFN79769386,2019-08-01
1,2019-08-01T017:27:37.000Z,M61140660,FT,3567,536202,DEBIT,ACH/maxlifeinsurance/978958600/5091047448,RFN54369752,2019-08-01
2,2019-08-01T019:48:36.000Z,M15100170,FT,5000,531202,DEBIT,BIL/BPAY/432994770625/motilaloswal/1771516350,RFN01307576,2019-08-01
3,2019-08-01T016:23:43.000Z,M42486489,FT,9000,522202,DEBIT,BIL/INF/282011610769/principalmutualfund/5014687117,RFN58783355,2019-08-01
4,2019-08-01T015:00:55.000Z,M92803625,FT,11345,510857,DEBIT,MMT/IMPS/974561764045/pnbpersonalloan/axis/SIB,RFN06186493,2019-08-01
5,2019-08-02T016:53:02.000Z,M63655895,FT,50,510807,DEBIT,UPI/506718301363/order/amul/okicici,RFN94034773,2019-08-02
6,2019-08-03T015:32:06.000Z,M96840288,FT,2231,508576,DEBIT,IPS/spencers/810835017872/256145420421/BANGALORE,RFN96822221,2019-08-03
7,2019-08-03T008:30:03.000Z,M51185347,FT,14583,493993,DEBIT,MMT/IMPS/142088569622/idbihomeloan/axis/SBIN,RFN33041124,2019-08-03
8,2019-08-04T013:29:12.000Z,M13469397,FT,12000,481993,DEBIT,MMT/IMPS/215925743911/rent/axis/BLR,RFN11807317,2019-08-04
9,2019-08-04T004:59:09.000Z,M36739380,FT,242,481751,DEBIT,VIN/burgerking/457855649189/729428881661,RFN86024827,2019-08-04


In [7]:
new_df = df.drop(["transactionTimestamp","balance","reference"],axis = 1)

In [8]:
new_df

Unnamed: 0,txnId,mode,amount,type,narration,valueDate
0,M63132270,FT,150000,CREDIT,salfintechproductsandsolutions August 2019,2019-08-01
1,M61140660,FT,3567,DEBIT,ACH/maxlifeinsurance/978958600/5091047448,2019-08-01
2,M15100170,FT,5000,DEBIT,BIL/BPAY/432994770625/motilaloswal/1771516350,2019-08-01
3,M42486489,FT,9000,DEBIT,BIL/INF/282011610769/principalmutualfund/5014687117,2019-08-01
4,M92803625,FT,11345,DEBIT,MMT/IMPS/974561764045/pnbpersonalloan/axis/SIB,2019-08-01
5,M63655895,FT,50,DEBIT,UPI/506718301363/order/amul/okicici,2019-08-02
6,M96840288,FT,2231,DEBIT,IPS/spencers/810835017872/256145420421/BANGALORE,2019-08-03
7,M51185347,FT,14583,DEBIT,MMT/IMPS/142088569622/idbihomeloan/axis/SBIN,2019-08-03
8,M13469397,FT,12000,DEBIT,MMT/IMPS/215925743911/rent/axis/BLR,2019-08-04
9,M36739380,FT,242,DEBIT,VIN/burgerking/457855649189/729428881661,2019-08-04


In [9]:
#Checking is narration string type or not
isinstance(new_df["narration"][0],str)

True

In [10]:
#To find the number of forward slashes in each narration
new_df["narration"][0].count('/')

0

In [11]:
#To find the first occurance index of a substr (works same as .index(), but find() is better because it returns -1 when not found) 
new_df["narration"][0].find('I/')

-1

In [12]:
#To split a string into tuple of 3 parts, part before specified substr, substr and part after substr
tup1 = new_df["narration"][0].partition('/')
#print(tup1)

In [13]:
tup1[0]=="UPI"
type(new_df.shape[0])==int
new_df["narration"].nunique()
type(new_df["narration"].isna().sum())

numpy.int64

In [14]:
#GOOD USEFUL METHOD
#To split the string into many parts based on the separator and return list, it returns original string in list if the separator is not present
li = new_df["narration"][0].split('/')
#print(li)

In [15]:
def gen_narrations(df):
    nan = df["narration"].isna().sum()
    if nan!=0:
        return ("The narration is empty for nan no. of transactions!")
    n = df.shape[0]
    narrations = []
    for i in range(0,n):
        li = [df["txnId"][i]]
        li+=(df["narration"][i].split('/'))
        narrations.append(li)
    return pd.DataFrame(narrations, columns =['txnId','f1','f2','f3','f4','f5','f6'], dtype = str)
    

In [16]:
narrations = gen_narrations(new_df)

In [17]:
narrations

Unnamed: 0,txnId,f1,f2,f3,f4,f5,f6
0,M63132270,salfintechproductsandsolutions August 2019,,,,,
1,M61140660,ACH,maxlifeinsurance,978958600,5091047448,,
2,M15100170,BIL,BPAY,432994770625,motilaloswal,1771516350,
3,M42486489,BIL,INF,282011610769,principalmutualfund,5014687117,
4,M92803625,MMT,IMPS,974561764045,pnbpersonalloan,axis,SIB
5,M63655895,UPI,506718301363,order,amul,okicici,
6,M96840288,IPS,spencers,810835017872,256145420421,BANGALORE,
7,M51185347,MMT,IMPS,142088569622,idbihomeloan,axis,SBIN
8,M13469397,MMT,IMPS,215925743911,rent,axis,BLR
9,M36739380,VIN,burgerking,457855649189,729428881661,,


In [18]:
#Making separate dataframes for specific merchants: These 7 are unique ones
upi_df = narrations[narrations["f1"]=="UPI"]
gib_df = narrations[narrations["f1"]=="GIB"]
vps_df = narrations[narrations["f1"]=="VPS"]
atm_df = narrations[narrations["f1"]=="ATM"]
ach_df = narrations[narrations["f1"]=="ACH"]
mmt_df = narrations[narrations["f1"]=="MMT"]
iin_df = narrations[narrations["f1"]=="IIN"]

In [19]:
upi_df

Unnamed: 0,txnId,f1,f2,f3,f4,f5,f6
5,M63655895,UPI,506718301363,order,amul,okicici,
10,M75996309,UPI,586443110699,order,uber,okhdfcbank,
11,M34495073,UPI,98422640356,order,bescom,ybl,
13,M07733887,UPI,562288631583,order,kwalitybazaar,upi,
14,M78743336,UPI,157657395315,order,ionbroadband,okaxis,
15,M35768082,UPI,618592981121,order,jio,oksbi,
18,M77387267,UPI,671010149775,order,amul,okhdfcbank,
19,M74247698,UPI,803549937513,order,1mg,upi,
23,M43929380,UPI,788562080526,order,amul,upi,
25,M63133976,UPI,395709773838,order,kwalitybazaar,oksbi,


In [20]:
gib_df

Unnamed: 0,txnId,f1,f2,f3,f4,f5,f6


In [21]:
vps_df

Unnamed: 0,txnId,f1,f2,f3,f4,f5,f6
16,M21276964,VPS,veromoda,244841026362,700465092525,BANGALORE,
22,M54067383,VPS,pvr,5335727812,292810391662,BLR,
41,M31855096,VPS,pvr,961673655629,649380358307,bangalore,
45,M67876121,VPS,eatfit,314384298325,302429015818,BANGALORE,
52,M58856524,VPS,pvr,990167312952,905548969125,Begaluru,
85,M30386741,VPS,pvr,780762122776,950523748720,BANGALORE,
119,M92136912,VPS,pvr,792560136235,532262116386,Begaluru,
145,M72404874,VPS,pvr,166866981592,65041562482,BLR,
152,M56914843,VPS,pvr,94940257987,691582210537,Begaluru,
154,M57990095,VPS,h&m,754228021047,720769284641,BANGALOR,


In [22]:
atm_df

Unnamed: 0,txnId,f1,f2,f3,f4,f5,f6
59,M04526987,ATM,GFCEA011,CASH WDL,2019-09-30,,
144,M84535786,ATM,SKBLT883,CASH WDL,2019-12-10,,
297,M32826090,ATM,DADCM718,CASH WDL,2020-05-18,,
301,M94836692,ATM,BESLX088,CASH WDL,2020-05-25,,
319,M94982383,ATM,BDTJA386,CASH WDL,2020-06-08,,
362,M51220268,ATM,OZXHH510,CASH WDL,2020-07-31,,


In [23]:
ach_df

Unnamed: 0,txnId,f1,f2,f3,f4,f5,f6
1,M61140660,ACH,maxlifeinsurance,978958600,5091047448,,
24,M62347463,ACH,indiafirstlifeinsurance,869552456,9625200416,,
29,M84994153,ACH,maxlifeinsurance,938423614,2163067846,,
54,M05551691,ACH,indiafirstlifeinsurance,473998381,4963666525,,
61,M18181002,ACH,maxlifeinsurance,857650067,5136262115,,
89,M70623062,ACH,indiafirstlifeinsurance,546626237,6114074043,,
95,M25387717,ACH,maxlifeinsurance,559613218,17809020,,
123,M31887652,ACH,indiafirstlifeinsurance,163096618,6192596030,,
128,M66399529,ACH,maxlifeinsurance,727153363,6831041660,,
155,M53483768,ACH,indiafirstlifeinsurance,416252480,6734427982,,


In [24]:
mmt_df

Unnamed: 0,txnId,f1,f2,f3,f4,f5,f6
4,M92803625,MMT,IMPS,974561764045,pnbpersonalloan,axis,SIB
7,M51185347,MMT,IMPS,142088569622,idbihomeloan,axis,SBIN
8,M13469397,MMT,IMPS,215925743911,rent,axis,BLR
26,M67511010,MMT,IMPS,554837900296,axisprivilegeccpmt,axis,SBIN
27,M63275494,MMT,IMPS,667892428773,sbieliteccpmt,axis,SIB
30,M89625317,MMT,IMPS,621898646336,pnbpersonalloan,axis,KIN
36,M20968754,MMT,IMPS,686986102135,idbihomeloan,axis,HDF
37,M25256322,MMT,IMPS,656496155428,rent,axis,HDF
56,M54743307,MMT,IMPS,306261064501,axisprivilegeccpmt,axis,KIN
58,M06956732,MMT,IMPS,738607658838,sbieliteccpmt,axis,SIB


In [25]:
iin_df

Unnamed: 0,txnId,f1,f2,f3,f4,f5,f6
21,M87023371,IIN,I-Debit,netflix,580064550900,792760485026,
51,M17086932,IIN,I-Debit,netflix,688130038806,810514369875,
84,M30473719,IIN,I-Debit,netflix,343521665246,942681297082,
118,M56935008,IIN,I-Debit,netflix,607752171197,181343213782,
146,M22407868,IIN,I-Debit,smokehouse,7793953909,313085325916,
151,M61388786,IIN,I-Debit,netflix,632382289143,473936116989,
183,M32404613,IIN,I-Debit,netflix,293222628840,694587657517,
211,M57486019,IIN,I-Debit,netflix,79564662092,393924811095,
236,M16584796,IIN,I-Debit,a2b,686198708093,975630452009,
241,M22002991,IIN,I-Debit,netflix,431904858118,368446370980,


In [26]:
#Making separate dataframes for specific merchants: These are for the others
ips_df = narrations[narrations["f1"]=="IPS"]
vin_df = narrations[narrations["f1"]=="VIN"]
cms_df = narrations[narrations["f1"]=="CMS"]
bil_df = narrations[narrations["f1"]=="BIL"]
nfs_df = narrations[narrations["f1"]=="NFS"]
other_df = narrations[narrations["f1"]!="UPI"]
other_df = other_df[other_df["f1"]!="GIB"]
other_df = other_df[other_df["f1"]!="VPS"]
other_df = other_df[other_df["f1"]!="ATM"]
other_df = other_df[other_df["f1"]!="ACH"]
other_df = other_df[other_df["f1"]!="MMT"]
other_df = other_df[other_df["f1"]!="IIN"]
other_df = other_df[other_df["f1"]!="IPS"]
other_df = other_df[other_df["f1"]!="VIN"]
other_df = other_df[other_df["f1"]!="CMS"]
other_df = other_df[other_df["f1"]!="BIL"]
other_df = other_df[other_df["f1"]!="NFS"]

In [27]:
ips_df

Unnamed: 0,txnId,f1,f2,f3,f4,f5,f6
6,M96840288,IPS,spencers,810835017872,256145420421,BANGALORE,
17,M10964413,IPS,beijingbites,567919602787,844170544653,BANGALORE,
35,M84206974,IPS,spencers,406647577386,545422652701,BANGALORE,
46,M75304819,IPS,cleartrip,790544643026,362238608671,BANGALORE,
66,M20346811,IPS,pvr,543823345690,401719210041,BANGALORE,
77,M97559535,IPS,beijingbites,577588159319,51837902328,BANGALORE,
91,M20239982,IPS,dentistclinic,181685415908,94379781194,BANGALORE,
100,M23071749,IPS,spencers,162510161099,694264105775,BANGALORE,
112,M38359843,IPS,gilly's,4755484016,804567812082,BANGALORE,
133,M91823010,IPS,spencers,179940225750,930584721,BANGALORE,


In [28]:
vin_df

Unnamed: 0,txnId,f1,f2,f3,f4,f5,f6
9,M36739380,VIN,burgerking,457855649189,729428881661,,
12,M05303476,VIN,indianoil,939947558663,833821338689,,
20,M96827610,VIN,tatasky,233675787126,290168960433,,
38,M81510743,VIN,beijingbites,63839626305,308831654398,,
48,M06319035,VIN,bluo,819016286442,556322265141,,
50,M08863745,VIN,tatasky,742429291513,277375225434,,
69,M64876783,VIN,veromoda,438469260145,75455968558,,
70,M36591507,VIN,adidas,424140139412,511505038910,,
73,M81623105,VIN,indianoil,712511156487,814058572983,,
79,M55699283,VIN,nolimmits,204185193736,74516582273,,


In [29]:
cms_df

Unnamed: 0,txnId,f1,f2,f3,f4,f5,f6


In [30]:
bil_df

Unnamed: 0,txnId,f1,f2,f3,f4,f5,f6
2,M15100170,BIL,BPAY,432994770625,motilaloswal,1771516350,
3,M42486489,BIL,INF,282011610769,principalmutualfund,5014687117,
31,M02157109,BIL,ONL,30734384765,motilaloswal,5157797573,
32,M09669801,BIL,INF,660869989301,principalmutualfund,2623555986,
39,M37054836,BIL,ONL,830440498368,994302139330,2953162090,
63,M29166520,BIL,BPAY,368847478836,motilaloswal,8748847068,
64,M90755282,BIL,ONL,577405250429,principalmutualfund,2344847265,
88,M50781402,BIL,BPAY,896255156419,233488841023,7993463050,
97,M40394623,BIL,INF,555804435355,motilaloswal,6669052138,
98,M59473571,BIL,INF,62912918565,principalmutualfund,4885673539,


In [31]:
nfs_df

Unnamed: 0,txnId,f1,f2,f3,f4,f5,f6


In [32]:
other_df

Unnamed: 0,txnId,f1,f2,f3,f4,f5,f6
0,M63132270,salfintechproductsandsolutions August 2019,,,,,
28,M00513734,salfintechproductsandsolutions September 2019,,,,,
60,M26492653,salfintechproductsandsolutions October 2019,,,,,
94,M96712754,salfintechproductsandsolutions November 2019,,,,,
127,M34029349,salfintechproductsandsolutions December 2019,,,,,
159,M78612952,salfintechproductsandsolutions January 2020,,,,,
192,M54851744,salfintechproductsandsolutions February 2020,,,,,
220,M62445095,salfintechproductsandsolutions March 2020,,,,,
246,M76523797,salfintechproductsandsolutions April 2020,,,,,
274,M84377800,salfintechproductsandsolutions May 2020,,,,,


In [33]:
#This dictionary indicates the column name which gives us the insights about the brand/company/reciever of money/transaction etc (CMS: there is no description)
merchants_list = ["UPI","GIB","VPS","ATM","ACH","MMT","IIN","IPS","VIN","CMS","BIL","NFS","OTHER"]
merchants_df_dict = {"UPI":upi_df, "GIB":gib_df, "VPS":vps_df, "ATM":atm_df, "ACH":ach_df, "MMT":mmt_df, "IIN":iin_df, "IPS":ips_df, "VIN":vin_df, "CMS":cms_df, "BIL":bil_df, "NFS":nfs_df, "OTHER":other_df}
selector_dict = {"UPI":["f3", "f4"], "GIB":["f3"], "VPS":["f2"], "ATM":["f3"], "ACH":["f2"], "MMT":["f4"], "IIN":["f3"], "IPS":["f2"], "VIN":["f2"], "CMS":["f3"], "BIL":["f3","f4"], "NFS":["f3"], "OTHER":["f1"]}

In [34]:
#print(merchants_list)
#print(merchants_df_dict)
#print(selector_dict)

In [35]:
    brand_df = pd.read_csv("Brand_db.csv")
brand_df

Unnamed: 0,Id,Brand_name,Category,Sub_category
0,0,Airtel (Bharti Air),Bills,Phone Bill
1,1,Jio,Bills,Phone Bill
2,2,Amul,Foods and Drinks,Snacks
3,3,Spencers,Groceries,Supermart
4,4,Reliance Fresh,Groceries,Supermart
5,5,Kwality Bazaar,Groceries,Small Grocery Shop
6,6,Shopper Stop,Shopping,Clothing Center
7,7,Reliance Digital,Shopping,Electronic Center
8,8,Louis Philippe,Shopping,M Clothing Brand
9,9,Arrow,Shopping,M Clothing Brand


In [36]:
#Function to remove whitespace (called by simply_string)
def remove(string): 
    return string.replace(" ", "") 
#Function to bring entire string to lower case and remove any spaces
def simplify_string(string):
    string = remove(string)
    string = string.casefold()
    return string
#Function to parse UPI id
import re
def parse_upi(string):
    alphanumeric = ""
    pattern = '[0-9.]'
    string = re.sub(pattern,'',string)
    tup = string.partition('@')
    return tup[0]
    
def parse_cms(string):
    pattern = '[0-9_]'
    return re.sub(pattern,'',string)

In [37]:
#Function to find which transaction indicates which brand
'''
def detect_brands(merchant, merchant_df, brands):
    f = selector_dict[merchant]
    if merchant=="UPI":
        trans_data1= merchant_df[f[0]].apply(simplify_string)
        trans_data2= merchant_df[f[1]].apply(simplify_string)
        trans_data2=trans_data2.apply(parse_upi)
        trans_data =trans_data1.str.cat(trans_data2, sep =",")
    elif merchant=="CMS":
        trans_data= merchant_df[f[0]].apply(simplify_string)
        trans_data=trans_data.apply(parse_cms)
    elif merchant=="BIL":
        trans_data1= merchant_df[f[0]].apply(simplify_string)
        trans_data2= merchant_df[f[1]].apply(simplify_string)
        trans_data = trans_data1.str.cat(trans_data2, sep ="")
        
    else:
        trans_data= merchant_df[f[0]].apply(simplify_string)
    brands = brands.apply(simplify_string)
    #print("brands: \n", brands)
    #print("trans_summaries: \n", trans_data)
    print("The total transactions under ",merchant,": ",len(trans_data))
    matching_count = 0
    matched = 0
    if merchant!="UPI":
        for t in trans_data:
            matched = 0
            for b in brands:
                index1 = t.find(b)
                index2 = b.find(t)
            
                if index1!=-1: #and index2!=-1:
                    #print(t)
                    matched = 1
                    matching_count+=1
                    continue
                elif index2!=-1:
                    #print(t)
                    matched = 1
                    matching_count+=1
                    continue
            if matched == 0:
                print(t)
    
    if merchant=="UPI":
        print("Trying with UPI ID")
        for t in trans_data:
            tup1= t.partition(',')
            t1 = tup1[0]
            t2 = tup1[2]
            matched = 0
            for b in brands:
                index1 = t1.find(b)
                index2 = b.find(t1)
            
                if index1!=-1: #and index2!=-1:
                    #print(t)
                    matched = 1
                    matching_count+=1
                    continue
                elif index2!=-1:
                    #print(t)
                    matched = 1
                    matching_count+=1
                    continue
            if matched == 0:
                print ("Matching no. 2")
                #print("t2: ",t2)
                for b in brands:
                    index1 = t2.find(b)
                    index2 = b.find(t2)
            
                    if index1!=-1: #and index2!=-1:
                        #print(t)
                        matched = 1
                        matching_count+=1
                        continue
                    elif index2!=-1:
                        #print(t)
                        matched = 1
                        matching_count+=1
                        continue
            if matched == 0:
                print("Not found: ",tup1)
                                 
    print("Matched brands: ",matching_count)
    '''

'\ndef detect_brands(merchant, merchant_df, brands):\n    f = selector_dict[merchant]\n    if merchant=="UPI":\n        trans_data1= merchant_df[f[0]].apply(simplify_string)\n        trans_data2= merchant_df[f[1]].apply(simplify_string)\n        trans_data2=trans_data2.apply(parse_upi)\n        trans_data =trans_data1.str.cat(trans_data2, sep =",")\n    elif merchant=="CMS":\n        trans_data= merchant_df[f[0]].apply(simplify_string)\n        trans_data=trans_data.apply(parse_cms)\n    elif merchant=="BIL":\n        trans_data1= merchant_df[f[0]].apply(simplify_string)\n        trans_data2= merchant_df[f[1]].apply(simplify_string)\n        trans_data = trans_data1.str.cat(trans_data2, sep ="")\n        \n    else:\n        trans_data= merchant_df[f[0]].apply(simplify_string)\n    brands = brands.apply(simplify_string)\n    #print("brands: \n", brands)\n    #print("trans_summaries: \n", trans_data)\n    print("The total transactions under ",merchant,": ",len(trans_data))\n    matching

In [38]:
#detect_brands(merchants_list[0],merchants_df_dict[merchants_list[0]], brands)
#print (merchants_list[9]) #,merchants_df_dict[merchants_list[0]]

In [39]:
type(df[df["narration"]=="SAL FINTECH PRODUCTS and SOLUTIONS DEC 2019"]["txnId"])

pandas.core.series.Series

In [40]:
#df.loc[df['txnId']=='M3258741','mode'].values[0]

In [41]:
#type(df["txnId"][0])

In [42]:
output_df = df.copy()

In [43]:
output_df.drop(["transactionTimestamp", "narration", "reference"],axis = 1, inplace = True)

In [44]:
output_df

Unnamed: 0,txnId,mode,amount,balance,type,valueDate
0,M63132270,FT,150000,539769,CREDIT,2019-08-01
1,M61140660,FT,3567,536202,DEBIT,2019-08-01
2,M15100170,FT,5000,531202,DEBIT,2019-08-01
3,M42486489,FT,9000,522202,DEBIT,2019-08-01
4,M92803625,FT,11345,510857,DEBIT,2019-08-01
5,M63655895,FT,50,510807,DEBIT,2019-08-02
6,M96840288,FT,2231,508576,DEBIT,2019-08-03
7,M51185347,FT,14583,493993,DEBIT,2019-08-03
8,M13469397,FT,12000,481993,DEBIT,2019-08-04
9,M36739380,FT,242,481751,DEBIT,2019-08-04


In [45]:
#output_df.at[df["txnId"]=="M3258741"]["brand"] = "yash"

In [46]:
#To find the row index where the some condtion is met
#index = output_df.index[output_df['txnId']=="M6221730"].tolist()

In [47]:
#To assign value to new cell of dataframe using row index and column name
#output_df.at[index[0], "brand"] = "Nike"

In [48]:
output_df["brand"] = ""

In [49]:
output_df["category"] = ""

In [50]:
output_df

Unnamed: 0,txnId,mode,amount,balance,type,valueDate,brand,category
0,M63132270,FT,150000,539769,CREDIT,2019-08-01,,
1,M61140660,FT,3567,536202,DEBIT,2019-08-01,,
2,M15100170,FT,5000,531202,DEBIT,2019-08-01,,
3,M42486489,FT,9000,522202,DEBIT,2019-08-01,,
4,M92803625,FT,11345,510857,DEBIT,2019-08-01,,
5,M63655895,FT,50,510807,DEBIT,2019-08-02,,
6,M96840288,FT,2231,508576,DEBIT,2019-08-03,,
7,M51185347,FT,14583,493993,DEBIT,2019-08-03,,
8,M13469397,FT,12000,481993,DEBIT,2019-08-04,,
9,M36739380,FT,242,481751,DEBIT,2019-08-04,,


In [51]:
def detect_brands(merchant, merchant_df, brand_df,output_df):
    output_df = output_df.copy()
    brand_df = brand_df.copy()
    merchant_df = merchant_df.copy()
    f = selector_dict[merchant]
    
    if merchant=="UPI":
        merchant_df[f[0]] = merchant_df[f[0]].apply(simplify_string)
        merchant_df[f[1]] = merchant_df[f[1]].apply(simplify_string)
        merchant_df[f[1]] = merchant_df[f[1]].apply(parse_upi)
        merchant_df[f[0]] = merchant_df[f[0]].str.cat(merchant_df[f[1]], sep =",")
        #print(merchant_df.head())
    elif merchant=="CMS":
        merchant_df[f[0]] = merchant_df[f[0]].apply(simplify_string)
        merchant_df[f[0]] = merchant_df[f[0]].apply(parse_cms)
        
    elif merchant=="BIL":
        merchant_df[f[0]] = merchant_df[f[0]].apply(simplify_string)
        merchant_df[f[1]] = merchant_df[f[1]].apply(simplify_string)
        merchant_df[f[0]] = merchant_df[f[0]] .str.cat(merchant_df[f[1]], sep ="")
        
    else:
        merchant_df[f[0]] = merchant_df[f[0]].apply(simplify_string)
        
    brand_df['parsed'] = brand_df['Brand_name'].apply(simplify_string)
    
    #print("brands: \n", brands)
    #print("trans_summaries: \n", trans_data)
    #print("The total transactions under ",merchant,": ",len(merchant_df[f[0]]))
    matching_count = 0
    matched = 0
    if merchant!="UPI":
       # print("Doing for: ",merchant)
        for index_t, row_t in merchant_df.iterrows():
            txnId = row_t['txnId']
            t = row_t[f[0]]
            matched = 0
            for index_b, row_b in brand_df.iterrows():
                b = row_b["parsed"]
                index1 = t.find(b)
                index2 = b.find(t)
            
                if index1!=-1: #and index2!=-1:
                    #print(t)
                    matched = 1
                    matched_brand = row_b["Brand_name"]
                    matched_category = row_b["Category"]
                    matching_count+=1
                    continue
                elif index2!=-1:
                    #print(t)
                    matched = 1
                    matched_brand = row_b["Brand_name"]
                    matched_category = row_b["Category"]
                    matching_count+=1
                    continue
            if matched == 0:
               # print("Not found: ",t)
                matched_brand, matched_category = "Unknown", "Other"
         #   else:
             #   print("TxnId: ", txnId, "  Brand Name: ", matched_brand, "  Category: ",matched_category)
            
            index = output_df.index[output_df['txnId']==txnId].tolist()
            output_df.at[index[0], "brand"] = matched_brand
            output_df.at[index[0], "category"] = matched_category
    
    if merchant=="UPI":
       # print("Doing for: ",merchant)
        for index_t, row_t in merchant_df.iterrows():
            txnId = row_t['txnId']
            #print(txnId)
            tup1= row_t[f[0]].partition(',')
            t1 = tup1[0]
            t2 = tup1[2]
            matched = 0
            for index_b, row_b in brand_df.iterrows():
                b = row_b["parsed"]
                index1 = t1.find(b)
                index2 = b.find(t1)
            
                if index1!=-1: #and index2!=-1:
                    #print(t)
                    matched = 1
                    matched_brand = row_b["Brand_name"]
                    matched_category = row_b["Category"]
                    matching_count+=1
                    continue
                elif index2!=-1:
                    #print(t)
                    matched = 1
                    matched_brand = row_b["Brand_name"]
                    matched_category = row_b["Category"]
                    matching_count+=1
                    continue
            if matched == 0:
              #  print ("Matching no. 2")
                #print("t2: ",t2)
                for index_b, row_b in brand_df.iterrows():
                    b = row_b["parsed"]
                    index1 = t2.find(b)
                    index2 = b.find(t2)
            
                    if index1!=-1: #and index2!=-1:
                        #print(t)
                        matched = 1
                        matched_brand = row_b["Brand_name"]
                        matched_category = row_b["Category"]
                        matching_count+=1
                        continue
                    elif index2!=-1:
                        #print(t)
                        matched = 1
                        matched_brand = row_b["Brand_name"]
                        matched_category = row_b["Category"]
                        matching_count+=1
                        continue
                
            if matched == 0:
              #  print("Not found: ",tup1)
                matched_brand, matched_category = t2, "Transfer"
          #  else:
            #    print("TxnId: ", txnId, "  Brand Name: ", matched_brand, "  Category: ",matched_category)
            
            index = output_df.index[output_df['txnId']==txnId].tolist()
            output_df.at[index[0], "brand"] = matched_brand
            output_df.at[index[0], "category"] = matched_category
   # print("Matched brands: ",matching_count)
    return output_df

In [52]:
for i in range(len(merchants_list)):
    output_df = detect_brands(merchants_list[i],merchants_df_dict[merchants_list[i]], brand_df, output_df)

In [53]:
output_df

Unnamed: 0,txnId,mode,amount,balance,type,valueDate,brand,category
0,M63132270,FT,150000,539769,CREDIT,2019-08-01,SAL FINTECH PRODUCTS and SOLUTIONS,Other
1,M61140660,FT,3567,536202,DEBIT,2019-08-01,Max Life Insurance,Insurance
2,M15100170,FT,5000,531202,DEBIT,2019-08-01,Motilal Oswal,Investment
3,M42486489,FT,9000,522202,DEBIT,2019-08-01,Principal Mutual Fund,Investment
4,M92803625,FT,11345,510857,DEBIT,2019-08-01,PNB Personal Loan,Loan (EMI)
5,M63655895,FT,50,510807,DEBIT,2019-08-02,Amul,Foods and Drinks
6,M96840288,FT,2231,508576,DEBIT,2019-08-03,Spencers,Groceries
7,M51185347,FT,14583,493993,DEBIT,2019-08-03,IDBI Home Loan,Loan (EMI)
8,M13469397,FT,12000,481993,DEBIT,2019-08-04,Rent,Rent
9,M36739380,FT,242,481751,DEBIT,2019-08-04,Burger King,Foods and Drinks


In [54]:
output_df.to_json("../web-app/server/src/allTransactions.json", orient = 'records')

In [55]:
output_df['valueDate'] = pd.to_datetime(output_df['valueDate'], format='%Y-%m-%d')

In [56]:
#Defining dictionary for identifying days
days_dict = {0: "Monday", 1: "Tuesday", 2: "Wednesday", 3: "Thurday", 4:"Friday", 5: "Saturday", 6: "Sunday"}
months_dict = {1: "January", 2: "February", 3: "March", 4: "April", 5:"May", 6: "June", 7: "July", 8: "August", 9: "September", 10: "October", 11: "November", 12: "December"}

In [57]:
months_dict[output_df.at[50,'valueDate'].month]

'September'

In [58]:
month_wise_exp = pd.DataFrame({"Month":[months_dict[i] for i in months_dict.keys()],"Total_Expense":0, "Total_Income":0})

In [59]:
month_wise_exp

Unnamed: 0,Month,Total_Expense,Total_Income
0,January,0,0
1,February,0,0
2,March,0,0
3,April,0,0
4,May,0,0
5,June,0,0
6,July,0,0
7,August,0,0
8,September,0,0
9,October,0,0


In [60]:
def get_monthly_exp(df, month_wise_exp):
    df = df.copy()
    month_wise_exp = month_wise_exp.copy()
    for index_1, row_1 in df.iterrows():
            date = row_1['valueDate']
            type_trans = row_1['type']
            amnt = row_1["amount"]
            m = date.month
            m = months_dict[m]
            for index_2, row_2 in month_wise_exp.iterrows():
                mon = row_2['Month']
                if mon == m:
                    ind = month_wise_exp.index[month_wise_exp['Month']==mon].tolist()
                    if type_trans == "DEBIT":
                        month_wise_exp.at[ind[0], "Total_Expense"] += amnt  
                    if type_trans == "CREDIT":
                        month_wise_exp.at[ind[0], "Total_Income"] += amnt  
                    continue
    return month_wise_exp
    

In [61]:
month_wise_exp = get_monthly_exp(output_df, month_wise_exp)

In [62]:
month_wise_exp

Unnamed: 0,Month,Total_Expense,Total_Income
0,January,160408,152608
1,February,118319,150000
2,March,125758,153006
3,April,128527,152016
4,May,140680,151540
5,June,214027,155102
6,July,127550,153123
7,August,115569,150000
8,September,136894,153500
9,October,138190,152021


In [63]:
month_wise_exp.to_json("../web-app/server/src/monthWiseEI.json", orient = 'records')

In [64]:
opening_closing = pd.DataFrame({"Month":[months_dict[i] for i in months_dict.keys()], "Opening_Balance":0, "Closing_Balance":0})

In [65]:
opening_closing

Unnamed: 0,Month,Opening_Balance,Closing_Balance
0,January,0,0
1,February,0,0
2,March,0,0
3,April,0,0
4,May,0,0
5,June,0,0
6,July,0,0
7,August,0,0
8,September,0,0
9,October,0,0


In [66]:
def get_opening_closing(df, opening_closing):
    df = df.copy()
    opening_closing = opening_closing.copy()
    for index_1, row_1 in opening_closing.iterrows():
                mon = row_1['Month']
                for index_2, row_2 in df.iterrows():
                    date = row_2['valueDate']
                    type_trans = row_2['type']
                    bal = row_2["balance"]
                    amnt = row_2["amount"]
                    m = date.month
                    m = months_dict[m]
                    if m == mon:
                        ind = opening_closing.index[opening_closing['Month']==mon].tolist()
                        if type_trans == "DEBIT":
                            opening_closing.at[ind[0], "Opening_Balance"] = bal + amnt
                        else:
                            opening_closing.at[ind[0], "Opening_Balance"] = bal - amnt
                        break
    
    for index_1, row_1 in opening_closing.iterrows():
        mon = row_1['Month']
        opening = row_1['Opening_Balance']
        ind = opening_closing.index[opening_closing['Month']==mon].tolist()
        #print (ind)
        if ind[0] == 0:
                opening_closing.at[11, "Closing_Balance"] = opening
        else:
            opening_closing.at[ind[0]-1, "Closing_Balance"] = opening
    return opening_closing            

In [67]:
opening_closing = get_opening_closing(output_df, opening_closing)
opening_closing

Unnamed: 0,Month,Opening_Balance,Closing_Balance
0,January,511731,503931
1,February,503931,535612
2,March,535612,562860
3,April,562860,586349
4,May,586349,597209
5,June,597209,538284
6,July,538284,389769
7,August,389769,424200
8,September,424200,440806
9,October,440806,454637


In [68]:
opening_closing.to_json("../web-app/server/src/openingClosing.json", orient = 'records')

In [69]:
category_list = brand_df["Category"].unique()
category_list.sort()
category_list

array(['Bills', 'Cash Withdrawal', 'Courier', 'Credit Card',
       'Entertainment', 'Foods and Drinks', 'Fuel', 'Groceries', 'Health',
       'Insurance', 'Investment', 'Loan (EMI)', 'Other', 'Personal Care',
       'Rent', 'Shopping', 'Transfer', 'Travel'], dtype=object)

In [70]:
category_wise_exp = pd.DataFrame({"Category": category_list, "Total_Expense":0})
category_wise_exp

Unnamed: 0,Category,Total_Expense
0,Bills,0
1,Cash Withdrawal,0
2,Courier,0
3,Credit Card,0
4,Entertainment,0
5,Foods and Drinks,0
6,Fuel,0
7,Groceries,0
8,Health,0
9,Insurance,0


In [71]:
def get_category_exp(df, category_wise_exp):
    df = df.copy()
    category_wise_exp = category_wise_exp.copy()
    for index_1, row_1 in df.iterrows():
            cat = row_1['category']
            type_trans = row_1['type']
            amnt = row_1["amount"]
            for index_2, row_2 in category_wise_exp.iterrows():
                c = row_2['Category']
                if cat == c:
                    ind = category_wise_exp.index[category_wise_exp['Category']==cat].tolist()
                    if type_trans == "DEBIT":
                        category_wise_exp.at[ind[0], "Total_Expense"] += amnt 
                    break
    return category_wise_exp

In [72]:
category_wise_exp = get_category_exp(output_df, category_wise_exp)
category_wise_exp

Unnamed: 0,Category,Total_Expense
0,Bills,37961
1,Cash Withdrawal,24000
2,Courier,0
3,Credit Card,486564
4,Entertainment,15710
5,Foods and Drinks,57449
6,Fuel,22029
7,Groceries,29925
8,Health,10429
9,Insurance,105216


In [73]:
category_wise_exp.to_json("../web-app/server/src/categoryWiseYearly.json", orient = 'records')

In [74]:
def Convert(lst): 
    li = list(months_dict.values())
    res_dct = {"Month": li} 
    res_dct.update({lst[i]: [0]*12  for i in range(0, len(lst))})
    res_dct.update({'Total_Expense' : [0]*12})
    return res_dct
res_dict = Convert(category_list)
res_dict

{'Month': ['January',
  'February',
  'March',
  'April',
  'May',
  'June',
  'July',
  'August',
  'September',
  'October',
  'November',
  'December'],
 'Bills': [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
 'Cash Withdrawal': [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
 'Courier': [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
 'Credit Card': [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
 'Entertainment': [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
 'Foods and Drinks': [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
 'Fuel': [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
 'Groceries': [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
 'Health': [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
 'Insurance': [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
 'Investment': [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
 'Loan (EMI)': [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
 'Other': [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
 'Personal Care': [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
 'Rent': [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
 'Shopping': [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
 'Transfer'

In [75]:
category_month_wise_exp = pd.DataFrame(res_dict)
category_month_wise_exp

Unnamed: 0,Month,Bills,Cash Withdrawal,Courier,Credit Card,Entertainment,Foods and Drinks,Fuel,Groceries,Health,Insurance,Investment,Loan (EMI),Other,Personal Care,Rent,Shopping,Transfer,Travel,Total_Expense
0,January,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
1,February,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
2,March,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
3,April,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
4,May,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
5,June,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
6,July,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
7,August,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
8,September,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
9,October,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0


In [76]:
def get_month_wise_category(df, category_month_wise_exp):
    df = df.copy()
    category_month_wise_exp = category_month_wise_exp.copy()
    for index_1, row_1 in df.iterrows():
            date = row_1['valueDate']
            mon_no = date.month
            type_trans = row_1['type']
            mon = months_dict[mon_no]
            amnt = row_1["amount"]
            category = row_1["category"]
            for index_2, row_2 in category_month_wise_exp.iterrows():
                m = row_2['Month']
                if mon == m:
                    ind = category_month_wise_exp.index[category_month_wise_exp['Month']==mon].tolist()
                    if type_trans == "DEBIT":
                        category_month_wise_exp.at[ind[0],category ] += amnt 
    return category_month_wise_exp                
            

In [77]:
category_month_wise_exp = get_month_wise_category(output_df, category_month_wise_exp)
category_month_wise_exp["Total_Expense"] = month_wise_exp["Total_Expense"]
category_month_wise_exp

Unnamed: 0,Month,Bills,Cash Withdrawal,Courier,Credit Card,Entertainment,Foods and Drinks,Fuel,Groceries,Health,Insurance,Investment,Loan (EMI),Other,Personal Care,Rent,Shopping,Transfer,Travel,Total_Expense
0,January,3029,0,0,40547,988,12751,0,2338,1900,8768,14000,25928,0,0,12000,16431,0,21728,160408
1,February,3428,0,0,40547,799,2580,2500,4399,2060,8768,14000,25928,0,0,12000,0,670,640,118319
2,March,3210,0,0,40547,799,972,1500,1565,220,8768,14000,25928,0,850,12000,15399,0,0,125758
3,April,2529,0,0,40547,1298,1717,2000,1983,0,8768,14000,25928,3500,0,12000,12908,0,1349,128527
4,May,3328,4500,0,40547,1098,12487,1500,3389,0,8768,14000,25928,0,799,12000,11492,0,844,140680
5,June,3129,2000,0,40547,799,2799,0,1259,0,8768,14000,25928,0,0,12000,102678,0,120,214027
6,July,3191,5000,0,40547,1249,2262,2400,1719,0,8768,14000,25928,0,0,12000,10486,0,0,127550
7,August,3428,0,0,40547,1098,1732,1500,2530,419,8768,14000,25928,0,0,12000,3289,0,330,115569
8,September,3329,10000,0,40547,3057,1997,0,2390,0,8768,14000,25928,0,150,12000,205,0,14523,136894
9,October,2789,0,0,40547,1658,9349,4700,1200,1900,8768,14000,25928,0,0,12000,14241,0,1110,138190


In [78]:
category_month_wise_exp.to_json("../web-app/server/src/categoryMonthWise.json", orient = 'records')

In [79]:
def get_current_balance(df):
    df = df.copy()
    '''
    y = datetime.now().year
    m = datetime.now().month
    d = datetime.now().day
    '''
    y = 2020
    m = 6
    d = 22
    dif = 31
    act_date = d
    for index_1, row_1 in df.iterrows():
        date = row_1['valueDate']
        year = date.year
        mon = date.month
        day = date.day
        if year == y and mon == m:
            #print (day)
            dif_cur = abs(d - day)
            dif = min(dif, dif_cur)
    #print(dif)
    act_date = d - dif
    for index_1, row_1 in df.iterrows():
        bal = row_1['balance']
        date = row_1['valueDate']
        year = date.year
        mon = date.month
        day = date.day
        #print (d)
        if year == y and mon == m and act_date == day:
            #print (d)
            return (round(bal,0), str(y)+"-"+str(m)+"-"+str(d))

In [80]:
current_balance, date = get_current_balance(output_df)
current_balance, date
cur_balance_df = pd.DataFrame({"date":[date], "balance":[current_balance]})
cur_balance_df

Unnamed: 0,date,balance
0,2020-6-22,584091


In [81]:
cur_balance_df.to_json("../web-app/server/src/currentBalance.json", orient = 'records')

In [82]:
def get_month_wise_transaction(df, month):
    df = df.copy()
    df["month"] = ""
    for index_1, row_1 in df.iterrows():
        date = row_1['valueDate']
        txnId = row_1['txnId']
        mon_no = date.month
        mon = months_dict[mon_no]
        ind = df.index[df['txnId']==txnId].tolist()
        df.at[ind[0],"month"] = mon 
    df = df[df['month'] == month] 
    return df
    

In [83]:
month_wise_transactions = pd.DataFrame()
for mon in months_dict.values():
    month_wise_transac = get_month_wise_transaction(output_df, mon)
    month_wise_transactions = pd.concat([month_wise_transactions, month_wise_transac], axis = 0, ignore_index= True)

In [84]:
def to_str_date(x):
    x = str(x)[:10]
    return x
month_wise_transactions["valueDate"] =month_wise_transactions["valueDate"].apply(to_str_date)

In [85]:
for i in range(month_wise_transactions.shape[0]):
    #print (month_wise_transactions["valueDate"][i])
    da = int(month_wise_transactions["valueDate"][i][-2:])
    mo = int(month_wise_transactions["valueDate"][i][5:7])
    mon = months_dict[mo]
    month_wise_transactions.at[i,"valueDate"] = str(da) + " " + mon

In [86]:
month_wise_transactions

Unnamed: 0,txnId,mode,amount,balance,type,valueDate,brand,category,month
0,M78612952,FT,150000,661731,CREDIT,1 January,SAL FINTECH PRODUCTS and SOLUTIONS,Other,January
1,M45150556,FT,3567,658164,DEBIT,1 January,Max Life Insurance,Insurance,January
2,M64460715,FT,11345,646819,DEBIT,1 January,PNB Personal Loan,Loan (EMI),January
3,M14574064,FT,5000,641819,DEBIT,1 January,Motilal Oswal,Investment,January
4,M90067616,FT,9000,632819,DEBIT,1 January,Principal Mutual Fund,Investment,January
5,M15856766,FT,1209,631610,DEBIT,2 January,KFC,Foods and Drinks,January
6,M60062919,FT,1939,629671,DEBIT,3 January,Spencers,Groceries,January
7,M13820349,FT,14583,615088,DEBIT,3 January,IDBI Home Loan,Loan (EMI),January
8,M42600137,FT,607,615695,CREDIT,4 January,Unknown,Other,January
9,M54340314,FT,12000,603695,DEBIT,4 January,Rent,Rent,January


In [87]:
month_wise_transactions.to_json("../web-app/server/src/monthWiseTransactions.json", orient = 'records')

In [88]:
def get_category_wise_transaction(df, category):
    df = df.copy()
    df = df[df['category']==category]
    return df

In [89]:
category_wise_transactions = pd.DataFrame()
for cat in category_list:
    cat_wise_transac = get_category_wise_transaction(output_df, cat)
    category_wise_transactions = pd.concat([category_wise_transactions, cat_wise_transac], axis = 0, ignore_index= True)
category_wise_transactions


Unnamed: 0,txnId,mode,amount,balance,type,valueDate,brand,category
0,M34495073,FT,1380,480041,DEBIT,2019-08-05,BESCOM,Bills
1,M78743336,FT,1299,477042,DEBIT,2019-08-09,Ion Broadband,Bills
2,M35768082,FT,399,476643,DEBIT,2019-08-10,Jio,Bills
3,M96827610,FT,350,471295,DEBIT,2019-08-18,Tatasky,Bills
4,M76267606,FT,1680,517859,DEBIT,2019-09-05,BESCOM,Bills
5,M98035297,FT,1299,515921,DEBIT,2019-09-09,Ion Broadband,Bills
6,M08863745,FT,350,498788,DEBIT,2019-09-18,Tatasky,Bills
7,M77255052,FT,1140,521771,DEBIT,2019-10-05,BESCOM,Bills
8,M55411983,FT,1299,517272,DEBIT,2019-10-10,Ion Broadband,Bills
9,M12997721,FT,350,506333,DEBIT,2019-10-18,Tatasky,Bills


In [90]:
category_wise_transactions.to_json("../web-app/server/src/categoryWiseTransactions.json", orient = 'records')

In [91]:
brands_list = output_df["brand"].unique().tolist()
brands_list

['SAL FINTECH PRODUCTS and SOLUTIONS',
 'Max Life Insurance',
 'Motilal Oswal ',
 'Principal Mutual Fund',
 'PNB Personal Loan',
 'Amul',
 'Spencers',
 ' IDBI Home Loan',
 'Rent',
 'Burger King',
 'Uber',
 'BESCOM',
 'Indian Oil',
 'Kwality Bazaar ',
 'Ion Broadband',
 'Jio',
 'Veromoda ',
 'Beijing Bites',
 '1mg',
 'Tatasky',
 'Netflix',
 'PVR',
 'IndiaFirst Life Insurance',
 'Axis Privilege CC PMT',
 'SBI Elite CC PMT',
 'SK Rolls',
 'Unknown',
 'Eat Fit',
 'Clear Trip',
 'BBlunt',
 'Bluo',
 'Chai Point',
 'Amazon (AMZN)',
 'Cash Withdrawal (cashwdl)',
 'KFC',
 'Adidas',
 'Sugar',
 'No Limmits',
 'Swiggy',
 'Decathlon',
 'Dentist Clinic',
 "Gilly's",
 'Ori Flame',
 'Tonique',
 'cafecofffeeday',
 'Smoke House',
 'Hairspeak',
 'Zomato',
 'H&M',
 'Catwalk',
 'American Tourister',
 'Stoned Monkey',
 'cafecofeeday',
 'Big Basket',
 'Leon Grill',
 'Sri Ganesh Medicals',
 'A2B',
 'Myntra',
 "Mr Goofy's",
 'Sky Bags',
 'Titan Eye Plus',
 'Only',
 'Land Mark',
 'IRCTC',
 'Bangalore Central',


In [92]:
brand, amount = [],[]
invalid_categories = ["Bills", "Cash Withdrawal", "Health", "Insurance", "Investment", "Loan (EMI)", "Other", "Rent", "Transfer", "Credit Card"]
def get_brand_wise(output_df):
    output_df = output_df.copy()
    for b in brands_list:
        ind = output_df.index[output_df['brand']==b].tolist()
        t_type = output_df.at[ind[0],"type"]
        cat = output_df.at[ind[0],"category"]
        b_amount = 0
        if t_type == "DEBIT" and cat not in invalid_categories:
            for i in ind:
                amnt = output_df.at[i, "amount"]
                b_amount += amnt
            brand.append(b)
            amount.append(b_amount)              

In [93]:
get_brand_wise(output_df)
brand, amount

(['Amul',
  'Spencers',
  'Burger King',
  'Uber',
  'Indian Oil',
  'Kwality Bazaar ',
  'Veromoda ',
  'Beijing Bites',
  'Netflix',
  'PVR',
  'SK Rolls',
  'Eat Fit',
  'Clear Trip',
  'BBlunt',
  'Bluo',
  'Chai Point',
  'Amazon (AMZN)',
  'KFC',
  'Adidas',
  'Sugar',
  'No Limmits',
  'Swiggy',
  'Decathlon',
  "Gilly's",
  'Ori Flame',
  'Tonique',
  'Smoke House',
  'Hairspeak',
  'Zomato',
  'H&M',
  'Catwalk',
  'American Tourister',
  'Stoned Monkey',
  'Big Basket',
  'Leon Grill',
  'A2B',
  'Myntra',
  "Mr Goofy's",
  'Sky Bags',
  'Titan Eye Plus',
  'Only',
  'Land Mark',
  'IRCTC',
  'Bangalore Central',
  'iPlanet',
  'Plated',
  'Croma'],
 [3300,
  21903,
  726,
  5925,
  22029,
  5286,
  13672,
  4308,
  9588,
  4722,
  80,
  1556,
  34602,
  2598,
  1400,
  419,
  15644,
  5408,
  17499,
  1240,
  15071,
  270,
  2001,
  16499,
  2019,
  3500,
  743,
  399,
  1757,
  15987,
  1999,
  4999,
  480,
  2736,
  242,
  310,
  1400,
  670,
  1299,
  7999,
  2100,
  8188

In [94]:
amount, brand = zip(*sorted(zip(amount, brand), reverse =True))

In [95]:
amount, brand

((89000,
  34602,
  22029,
  21903,
  17499,
  16499,
  15987,
  15644,
  15071,
  13672,
  9588,
  8502,
  8188,
  7999,
  5925,
  5408,
  5286,
  4999,
  4722,
  4308,
  3500,
  3300,
  2736,
  2598,
  2110,
  2100,
  2019,
  2001,
  1999,
  1757,
  1556,
  1400,
  1400,
  1299,
  1240,
  789,
  743,
  726,
  670,
  599,
  480,
  419,
  399,
  310,
  270,
  242,
  80),
 ('iPlanet',
  'Clear Trip',
  'Indian Oil',
  'Spencers',
  'Adidas',
  "Gilly's",
  'H&M',
  'Amazon (AMZN)',
  'No Limmits',
  'Veromoda ',
  'Netflix',
  'Bangalore Central',
  'Land Mark',
  'Titan Eye Plus',
  'Uber',
  'KFC',
  'Kwality Bazaar ',
  'American Tourister',
  'PVR',
  'Beijing Bites',
  'Tonique',
  'Amul',
  'Big Basket',
  'BBlunt',
  'Plated',
  'Only',
  'Ori Flame',
  'Decathlon',
  'Catwalk',
  'Zomato',
  'Eat Fit',
  'Myntra',
  'Bluo',
  'Sky Bags',
  'Sugar',
  'IRCTC',
  'Smoke House',
  'Burger King',
  "Mr Goofy's",
  'Croma',
  'Stoned Monkey',
  'Chai Point',
  'Hairspeak',
  'A2B',
 

In [96]:
brandWise = pd.DataFrame({"brand": brand, "amount": amount})
brandWise

Unnamed: 0,brand,amount
0,iPlanet,89000
1,Clear Trip,34602
2,Indian Oil,22029
3,Spencers,21903
4,Adidas,17499
5,Gilly's,16499
6,H&M,15987
7,Amazon (AMZN),15644
8,No Limmits,15071
9,Veromoda,13672


In [97]:
brandWise.to_json("../web-app/server/src/brandWise.json", orient = 'records')

In [98]:
brandWiseOther = brandWise[:7]
others = brandWise[7:]
others = others.reset_index()
oth_amnt = 0
for i in range(others.shape[0]):
    oth_amnt += int(others["amount"][i])

In [99]:
brandWiseOther = brandWiseOther.append({"brand":"Others", "amount": oth_amnt},ignore_index= True)

In [100]:
brandWiseOther.to_json("../web-app/server/src/brandWiseOther.json", orient = 'records')

In [101]:
category_list

array(['Bills', 'Cash Withdrawal', 'Courier', 'Credit Card',
       'Entertainment', 'Foods and Drinks', 'Fuel', 'Groceries', 'Health',
       'Insurance', 'Investment', 'Loan (EMI)', 'Other', 'Personal Care',
       'Rent', 'Shopping', 'Transfer', 'Travel'], dtype=object)

In [102]:
month_wise_transactions

Unnamed: 0,txnId,mode,amount,balance,type,valueDate,brand,category,month
0,M78612952,FT,150000,661731,CREDIT,1 January,SAL FINTECH PRODUCTS and SOLUTIONS,Other,January
1,M45150556,FT,3567,658164,DEBIT,1 January,Max Life Insurance,Insurance,January
2,M64460715,FT,11345,646819,DEBIT,1 January,PNB Personal Loan,Loan (EMI),January
3,M14574064,FT,5000,641819,DEBIT,1 January,Motilal Oswal,Investment,January
4,M90067616,FT,9000,632819,DEBIT,1 January,Principal Mutual Fund,Investment,January
5,M15856766,FT,1209,631610,DEBIT,2 January,KFC,Foods and Drinks,January
6,M60062919,FT,1939,629671,DEBIT,3 January,Spencers,Groceries,January
7,M13820349,FT,14583,615088,DEBIT,3 January,IDBI Home Loan,Loan (EMI),January
8,M42600137,FT,607,615695,CREDIT,4 January,Unknown,Other,January
9,M54340314,FT,12000,603695,DEBIT,4 January,Rent,Rent,January


In [126]:
july_trans = month_wise_transactions[month_wise_transactions["month"] == "July"]
july_trans

Unnamed: 0,txnId,mode,amount,balance,type,valueDate,brand,category,month
175,M89863413,FT,150000,688284,CREDIT,1 July,SAL FINTECH PRODUCTS and SOLUTIONS,Other,July
176,M09870531,FT,3567,684717,DEBIT,1 July,Max Life Insurance,Insurance,July
177,M62891257,FT,11345,673372,DEBIT,1 July,PNB Personal Loan,Loan (EMI),July
178,M43579408,FT,5000,668372,DEBIT,1 July,Motilal Oswal,Investment,July
179,M69608046,FT,9000,659372,DEBIT,1 July,Principal Mutual Fund,Investment,July
180,M84606717,FT,230,659142,DEBIT,2 July,Amul,Foods and Drinks,July
181,M25849614,FT,1599,657543,DEBIT,3 July,Spencers,Groceries,July
182,M49644995,FT,14583,642960,DEBIT,3 July,IDBI Home Loan,Loan (EMI),July
183,M94955894,FT,12000,630960,DEBIT,4 July,Rent,Rent,July
184,M36395642,FT,242,630718,DEBIT,4 July,Burger King,Foods and Drinks,July


In [127]:
july_trans = july_trans[july_trans["type"]=="DEBIT"]
july_brands = july_trans["brand"].tolist()
july_brands = set(july_brands)
july_trans = july_trans.reset_index()
# MANUAL WORK: MAKE IT PROPER IF TIME
july_brands.remove(' IDBI Home Loan')
july_brands.remove('Axis Privilege CC PMT')
july_brands.remove('BESCOM')
july_brands.remove('Cash Withdrawal (cashwdl)')
july_brands.remove('IndiaFirst Life Insurance')
july_brands.remove('Indian Oil')
july_brands.remove('Ion Broadband')
july_brands.remove('Max Life Insurance')
july_brands.remove('Motilal Oswal ')
july_brands.remove('PNB Personal Loan')
july_brands.remove('Principal Mutual Fund')
july_brands.remove('Rent')
july_brands.remove('SBI Elite CC PMT')
july_brands.remove('Tatasky')
july_brands

{'Amul',
 'Bangalore Central',
 'Beijing Bites',
 'Burger King',
 'Eat Fit',
 'KFC',
 'Kwality Bazaar ',
 'Land Mark',
 'Netflix',
 'PVR',
 'Spencers',
 'Veromoda '}

In [128]:
br_dict = {}
for i in july_brands:
    br_dict[i] = 0

In [129]:
for i in range(july_trans.shape[0]):
   # print(i)
    brd = july_trans["brand"][i]
    #print(brd)
    amnt = july_trans["amount"][i]
    if brd in july_brands:
        br_dict[brd] += int(amnt)

In [130]:
br_dict

{'Veromoda ': 2889,
 'Amul': 280,
 'PVR': 450,
 'Eat Fit': 410,
 'Burger King': 242,
 'Kwality Bazaar ': 120,
 'Bangalore Central': 3999,
 'Netflix': 799,
 'Beijing Bites': 790,
 'KFC': 540,
 'Land Mark': 3598,
 'Spencers': 1599}

In [131]:
br_dict = sorted(br_dict.items(), key=lambda x: x[1], reverse=True)

In [132]:
brd_li = []
amnt_li = []
for i in br_dict:
    brd_li.append(i[0])
    amnt_li.append(i[1])

In [133]:
brandWiseMonth = pd.DataFrame({"brand":brd_li, "amount":amnt_li})

In [134]:
brandWiseMonth

Unnamed: 0,brand,amount
0,Bangalore Central,3999
1,Land Mark,3598
2,Veromoda,2889
3,Spencers,1599
4,Netflix,799
5,Beijing Bites,790
6,KFC,540
7,PVR,450
8,Eat Fit,410
9,Amul,280


In [135]:
brandWiseMonth.to_json("../web-app/server/src/brandWiseMonth.json", orient = 'records')

In [None]:
month_wise_transactions

In [None]:
month_li = list(months_dict.values())

In [None]:
mo_li = []
br_li = []
amt_li = [] 
da_li = []

In [None]:
avoid = ["SAL FINTECH PRODUCTS and SOLUTIONS","Axis Privilege CC PMT", "Max Life Insurance", "PNB Personal Loan", "Motilal Oswal ", "SBI Elite CC PMT" ,"Unknown", "Cash Withdrawal (cashwdl)" ,"Rent", "BESCOM", "Principal Mutual Fund"]
done = 0
for m in month_li:
    done = 0
    d = month_wise_transactions[month_wise_transactions["month"]==m]
    d = d.reset_index()
    d.sort_values(by=['amount'], inplace=True, ascending=False)
    d = d.reset_index()
    for i in range(d.shape[0]):
        if done==0:
            bnd = d['brand'][i]
            amt = d['amount'][i]
            da = d['valueDate'][i]
            if bnd not in avoid:
                mo_li.append(m)
                br_li.append(bnd)
                amt_li.append(amt)
                da_li.append(da)
                done = 1

In [None]:
da_li

In [None]:
topBrandMonth = pd.DataFrame({"month": mo_li, "brand":br_li, "amount":amt_li, "date":da_li})

In [None]:
topBrandMonth.to_json("../web-app/server/src/topBrandMonth.json", orient = 'records')

In [None]:
fav_brands = ["iPlanet", "Adidas", "Gilly's", "H&M"]

In [None]:
favBrands = pd.DataFrame({"brand":fav_brands})

In [None]:
favBrands.to_json("../web-app/server/src/favBrands.json", orient = 'records')

In [None]:
#url db for logos
from os import listdir
from os.path import isfile, join
onlyfiles = [f for f in listdir("../web-app/Client/src/assets/Logos/") if isfile(join("../web-app/Client/src/assets/Logos/", f))]
(onlyfiles)

In [None]:
onlyfiles.remove('iPlanet.png')
onlyfiles.sort()
onlyfiles.append('iPlanet.png')

In [None]:
to_append = "./Logos/"
for i in range(len(onlyfiles)):
    onlyfiles[i] = to_append + onlyfiles[i]
onlyfiles

In [None]:
brandWise.sort_values(by=['brand'], inplace=True, ascending=True)
brandWise_url = brandWise

In [None]:
brandWise_url

In [None]:
brandWise_url["url"] = onlyfiles

In [None]:
brandWise_url = brandWise_url.reset_index()

In [None]:
brandWise_url

In [None]:
brandWise_url["url"][33] = "plated.jpg"

In [None]:
brandWise_url.drop(["amount","index"],inplace=True,axis=1)

In [None]:
brandWise_url.to_json("../web-app/server/src/newBrandWiseUrl.json", orient = 'records')