# Xente Fraud Detection

In [1]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt

In [2]:
pd.set_option('max_colwidth', 1000)
pd.set_option('display.max_columns', 40)
description = pd.read_csv('Xente_Variable_Definitions.csv', encoding='utf-8')
description

Unnamed: 0,Column Name,Definition
0,TransactionId,Unique �transaction identifier on platform
1,BatchId,Unique number assigned to a batch of transactions for processing
2,AccountId,Unique number identifying the customer on platform
3,SubscriptionId,Unique number identifying the customer subscription
4,CustomerId,Unique identifier attached to Account
5,CurrencyCode,Country currency
6,CountryCode,Numerical geographical code of country
7,ProviderId,Source provider of Item �bought.
8,ProductId,Item name being bought.
9,ProductCategory,ProductIds are organized into these broader product categories.


In [3]:
df = pd.read_csv('training.csv')
df

Unnamed: 0,TransactionId,BatchId,AccountId,SubscriptionId,CustomerId,CurrencyCode,CountryCode,ProviderId,ProductId,ProductCategory,ChannelId,Amount,Value,TransactionStartTime,PricingStrategy,FraudResult
0,TransactionId_76871,BatchId_36123,AccountId_3957,SubscriptionId_887,CustomerId_4406,UGX,256,ProviderId_6,ProductId_10,airtime,ChannelId_3,1000.0,1000,2018-11-15T02:18:49Z,2,0
1,TransactionId_73770,BatchId_15642,AccountId_4841,SubscriptionId_3829,CustomerId_4406,UGX,256,ProviderId_4,ProductId_6,financial_services,ChannelId_2,-20.0,20,2018-11-15T02:19:08Z,2,0
2,TransactionId_26203,BatchId_53941,AccountId_4229,SubscriptionId_222,CustomerId_4683,UGX,256,ProviderId_6,ProductId_1,airtime,ChannelId_3,500.0,500,2018-11-15T02:44:21Z,2,0
3,TransactionId_380,BatchId_102363,AccountId_648,SubscriptionId_2185,CustomerId_988,UGX,256,ProviderId_1,ProductId_21,utility_bill,ChannelId_3,20000.0,21800,2018-11-15T03:32:55Z,2,0
4,TransactionId_28195,BatchId_38780,AccountId_4841,SubscriptionId_3829,CustomerId_988,UGX,256,ProviderId_4,ProductId_6,financial_services,ChannelId_2,-644.0,644,2018-11-15T03:34:21Z,2,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
95657,TransactionId_89881,BatchId_96668,AccountId_4841,SubscriptionId_3829,CustomerId_3078,UGX,256,ProviderId_4,ProductId_6,financial_services,ChannelId_2,-1000.0,1000,2019-02-13T09:54:09Z,2,0
95658,TransactionId_91597,BatchId_3503,AccountId_3439,SubscriptionId_2643,CustomerId_3874,UGX,256,ProviderId_6,ProductId_10,airtime,ChannelId_3,1000.0,1000,2019-02-13T09:54:25Z,2,0
95659,TransactionId_82501,BatchId_118602,AccountId_4841,SubscriptionId_3829,CustomerId_3874,UGX,256,ProviderId_4,ProductId_6,financial_services,ChannelId_2,-20.0,20,2019-02-13T09:54:35Z,2,0
95660,TransactionId_136354,BatchId_70924,AccountId_1346,SubscriptionId_652,CustomerId_1709,UGX,256,ProviderId_6,ProductId_19,tv,ChannelId_3,3000.0,3000,2019-02-13T10:01:10Z,2,0


## Data Cleaning & Preprocessing

In [4]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 95662 entries, 0 to 95661
Data columns (total 16 columns):
 #   Column                Non-Null Count  Dtype  
---  ------                --------------  -----  
 0   TransactionId         95662 non-null  object 
 1   BatchId               95662 non-null  object 
 2   AccountId             95662 non-null  object 
 3   SubscriptionId        95662 non-null  object 
 4   CustomerId            95662 non-null  object 
 5   CurrencyCode          95662 non-null  object 
 6   CountryCode           95662 non-null  int64  
 7   ProviderId            95662 non-null  object 
 8   ProductId             95662 non-null  object 
 9   ProductCategory       95662 non-null  object 
 10  ChannelId             95662 non-null  object 
 11  Amount                95662 non-null  float64
 12  Value                 95662 non-null  int64  
 13  TransactionStartTime  95662 non-null  object 
 14  PricingStrategy       95662 non-null  int64  
 15  FraudResult        

There's no missing data

In [5]:
def uniqueInCol(dataframe):
    dfNunique = []
    for col in dataframe.columns:
        dfNunique.append(dataframe[col].nunique())
    return pd.DataFrame(dfNunique, columns=['nunique'], index=dataframe.columns)

uniqueInCol(df)

Unnamed: 0,nunique
TransactionId,95662
BatchId,94809
AccountId,3633
SubscriptionId,3627
CustomerId,3742
CurrencyCode,1
CountryCode,1
ProviderId,6
ProductId,23
ProductCategory,9


In [6]:
df.drop(['CurrencyCode', 'CountryCode'], axis=1, inplace=True)

In [7]:
df.TransactionStartTime = df.TransactionStartTime.apply(lambda x : ' '.join(x[:-1].split('T')))
df.TransactionStartTime = pd.to_datetime(df.TransactionStartTime)

In [8]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 95662 entries, 0 to 95661
Data columns (total 14 columns):
 #   Column                Non-Null Count  Dtype         
---  ------                --------------  -----         
 0   TransactionId         95662 non-null  object        
 1   BatchId               95662 non-null  object        
 2   AccountId             95662 non-null  object        
 3   SubscriptionId        95662 non-null  object        
 4   CustomerId            95662 non-null  object        
 5   ProviderId            95662 non-null  object        
 6   ProductId             95662 non-null  object        
 7   ProductCategory       95662 non-null  object        
 8   ChannelId             95662 non-null  object        
 9   Amount                95662 non-null  float64       
 10  Value                 95662 non-null  int64         
 11  TransactionStartTime  95662 non-null  datetime64[ns]
 12  PricingStrategy       95662 non-null  int64         
 13  FraudResult     

In [9]:
df

Unnamed: 0,TransactionId,BatchId,AccountId,SubscriptionId,CustomerId,ProviderId,ProductId,ProductCategory,ChannelId,Amount,Value,TransactionStartTime,PricingStrategy,FraudResult
0,TransactionId_76871,BatchId_36123,AccountId_3957,SubscriptionId_887,CustomerId_4406,ProviderId_6,ProductId_10,airtime,ChannelId_3,1000.0,1000,2018-11-15 02:18:49,2,0
1,TransactionId_73770,BatchId_15642,AccountId_4841,SubscriptionId_3829,CustomerId_4406,ProviderId_4,ProductId_6,financial_services,ChannelId_2,-20.0,20,2018-11-15 02:19:08,2,0
2,TransactionId_26203,BatchId_53941,AccountId_4229,SubscriptionId_222,CustomerId_4683,ProviderId_6,ProductId_1,airtime,ChannelId_3,500.0,500,2018-11-15 02:44:21,2,0
3,TransactionId_380,BatchId_102363,AccountId_648,SubscriptionId_2185,CustomerId_988,ProviderId_1,ProductId_21,utility_bill,ChannelId_3,20000.0,21800,2018-11-15 03:32:55,2,0
4,TransactionId_28195,BatchId_38780,AccountId_4841,SubscriptionId_3829,CustomerId_988,ProviderId_4,ProductId_6,financial_services,ChannelId_2,-644.0,644,2018-11-15 03:34:21,2,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
95657,TransactionId_89881,BatchId_96668,AccountId_4841,SubscriptionId_3829,CustomerId_3078,ProviderId_4,ProductId_6,financial_services,ChannelId_2,-1000.0,1000,2019-02-13 09:54:09,2,0
95658,TransactionId_91597,BatchId_3503,AccountId_3439,SubscriptionId_2643,CustomerId_3874,ProviderId_6,ProductId_10,airtime,ChannelId_3,1000.0,1000,2019-02-13 09:54:25,2,0
95659,TransactionId_82501,BatchId_118602,AccountId_4841,SubscriptionId_3829,CustomerId_3874,ProviderId_4,ProductId_6,financial_services,ChannelId_2,-20.0,20,2019-02-13 09:54:35,2,0
95660,TransactionId_136354,BatchId_70924,AccountId_1346,SubscriptionId_652,CustomerId_1709,ProviderId_6,ProductId_19,tv,ChannelId_3,3000.0,3000,2019-02-13 10:01:10,2,0


## Exploratory Data Analysis

In [10]:
# Extremely imbalace data
df.FraudResult.value_counts()

0    95469
1      193
Name: FraudResult, dtype: int64

In [11]:
# Let's inspect some information from AccountId
df[df.AccountId == 'AccountId_4841']

Unnamed: 0,TransactionId,BatchId,AccountId,SubscriptionId,CustomerId,ProviderId,ProductId,ProductCategory,ChannelId,Amount,Value,TransactionStartTime,PricingStrategy,FraudResult
1,TransactionId_73770,BatchId_15642,AccountId_4841,SubscriptionId_3829,CustomerId_4406,ProviderId_4,ProductId_6,financial_services,ChannelId_2,-20.0,20,2018-11-15 02:19:08,2,0
4,TransactionId_28195,BatchId_38780,AccountId_4841,SubscriptionId_3829,CustomerId_988,ProviderId_4,ProductId_6,financial_services,ChannelId_2,-644.0,644,2018-11-15 03:34:21,2,0
7,TransactionId_100640,BatchId_38561,AccountId_4841,SubscriptionId_3829,CustomerId_2858,ProviderId_4,ProductId_6,financial_services,ChannelId_2,-500.0,500,2018-11-15 03:45:13,2,0
11,TransactionId_33857,BatchId_126394,AccountId_4841,SubscriptionId_3829,CustomerId_3052,ProviderId_4,ProductId_6,financial_services,ChannelId_2,-40.0,40,2018-11-15 04:32:42,2,0
18,TransactionId_64044,BatchId_117733,AccountId_4841,SubscriptionId_3829,CustomerId_3105,ProviderId_4,ProductId_6,financial_services,ChannelId_2,-10.0,10,2018-11-15 04:54:18,2,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
95653,TransactionId_68851,BatchId_100900,AccountId_4841,SubscriptionId_3829,CustomerId_539,ProviderId_4,ProductId_6,financial_services,ChannelId_2,-140.0,140,2019-02-13 09:50:34,2,0
95655,TransactionId_38907,BatchId_92168,AccountId_4841,SubscriptionId_3829,CustomerId_960,ProviderId_4,ProductId_6,financial_services,ChannelId_2,-50.0,50,2019-02-13 09:52:49,2,0
95657,TransactionId_89881,BatchId_96668,AccountId_4841,SubscriptionId_3829,CustomerId_3078,ProviderId_4,ProductId_6,financial_services,ChannelId_2,-1000.0,1000,2019-02-13 09:54:09,2,0
95659,TransactionId_82501,BatchId_118602,AccountId_4841,SubscriptionId_3829,CustomerId_3874,ProviderId_4,ProductId_6,financial_services,ChannelId_2,-20.0,20,2019-02-13 09:54:35,2,0


In [12]:
# Some AccountId is attached to several CustomerId
df[df.AccountId == 'AccountId_4841'].CustomerId.nunique()

2577

In [13]:
df[(df.AccountId == 'AccountId_4841') & (df.FraudResult == 1) ]

Unnamed: 0,TransactionId,BatchId,AccountId,SubscriptionId,CustomerId,ProviderId,ProductId,ProductCategory,ChannelId,Amount,Value,TransactionStartTime,PricingStrategy,FraudResult
68918,TransactionId_71725,BatchId_62457,AccountId_4841,SubscriptionId_3829,CustomerId_865,ProviderId_4,ProductId_6,financial_services,ChannelId_2,-1005.0,1005,2019-01-23 09:15:37,2,1


In [14]:
# Let's look at the fraudulent transaction
df[df.FraudResult == 1].sort_values(by=['Value'])

Unnamed: 0,TransactionId,BatchId,AccountId,SubscriptionId,CustomerId,ProviderId,ProductId,ProductCategory,ChannelId,Amount,Value,TransactionStartTime,PricingStrategy,FraudResult
68888,TransactionId_11592,BatchId_62469,AccountId_530,SubscriptionId_135,CustomerId_865,ProviderId_6,ProductId_3,airtime,ChannelId_3,500.0,500,2019-01-23 08:55:16,2,1
68918,TransactionId_71725,BatchId_62457,AccountId_4841,SubscriptionId_3829,CustomerId_865,ProviderId_4,ProductId_6,financial_services,ChannelId_2,-1005.0,1005,2019-01-23 09:15:37,2,1
71689,TransactionId_52301,BatchId_76054,AccountId_1609,SubscriptionId_2872,CustomerId_1988,ProviderId_3,ProductId_15,financial_services,ChannelId_3,30000.0,30000,2019-01-25 10:24:10,2,1
70492,TransactionId_78946,BatchId_13641,AccountId_1609,SubscriptionId_2872,CustomerId_1988,ProviderId_3,ProductId_15,financial_services,ChannelId_3,50000.0,50000,2019-01-24 21:02:07,2,1
69570,TransactionId_63916,BatchId_36626,AccountId_1609,SubscriptionId_2872,CustomerId_1988,ProviderId_3,ProductId_15,financial_services,ChannelId_3,50000.0,50000,2019-01-23 20:49:20,2,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
93003,TransactionId_137519,BatchId_61860,AccountId_1178,SubscriptionId_2288,CustomerId_1535,ProviderId_1,ProductId_15,financial_services,ChannelId_3,9850000.0,9850000,2019-02-11 03:26:13,2,1
87466,TransactionId_55014,BatchId_130638,AccountId_1178,SubscriptionId_2288,CustomerId_1535,ProviderId_1,ProductId_15,financial_services,ChannelId_3,9856000.0,9856000,2019-02-06 21:58:05,2,1
92152,TransactionId_15293,BatchId_68761,AccountId_1178,SubscriptionId_2288,CustomerId_1535,ProviderId_1,ProductId_15,financial_services,ChannelId_3,9860888.0,9860888,2019-02-10 05:11:25,2,1
87465,TransactionId_27985,BatchId_15818,AccountId_1178,SubscriptionId_2288,CustomerId_1535,ProviderId_1,ProductId_15,financial_services,ChannelId_3,9870000.0,9870000,2019-02-06 21:49:57,2,1


In [15]:
df[(df.FraudResult == 1) & (df.Amount < 0)]

Unnamed: 0,TransactionId,BatchId,AccountId,SubscriptionId,CustomerId,ProviderId,ProductId,ProductCategory,ChannelId,Amount,Value,TransactionStartTime,PricingStrategy,FraudResult
55822,TransactionId_56954,BatchId_76447,AccountId_4249,SubscriptionId_4429,CustomerId_7339,ProviderId_4,ProductId_3,airtime,ChannelId_2,-900000.0,900000,2019-01-10 15:48:59,4,1
55841,TransactionId_73797,BatchId_127642,AccountId_4249,SubscriptionId_4429,CustomerId_7339,ProviderId_4,ProductId_10,airtime,ChannelId_2,-500000.0,500000,2019-01-10 16:03:35,4,1
56035,TransactionId_87682,BatchId_101919,AccountId_4249,SubscriptionId_4429,CustomerId_7401,ProviderId_4,ProductId_3,airtime,ChannelId_2,-500000.0,500000,2019-01-10 21:27:12,4,1
60834,TransactionId_126320,BatchId_93368,AccountId_4249,SubscriptionId_4429,CustomerId_7429,ProviderId_4,ProductId_3,airtime,ChannelId_2,-500000.0,500000,2019-01-15 11:09:57,0,1
68918,TransactionId_71725,BatchId_62457,AccountId_4841,SubscriptionId_3829,CustomerId_865,ProviderId_4,ProductId_6,financial_services,ChannelId_2,-1005.0,1005,2019-01-23 09:15:37,2,1


In [16]:
pd.options.display.float_format = '{:.5f}'.format

In [17]:
# adding Debit column, because from the exploration above fraudulent activities tend to be caused from debit payment
df['Debit'] = df.Amount.apply(lambda x: 0 if x<0 else 1)

In [18]:
# adding time differences between a transaction with the previous transaction in a same AccountId
def timeDifference(dataframe):
    accountList = []
    daysDiff = []
    secondsDiff = []
    for account in dataframe.sort_values(by=['AccountId', 'TransactionStartTime']).AccountId.unique():
        tempDays = []
        tempSeconds = []
        for row in range(len(dataframe[dataframe['AccountId'] == account])):
            if account not in accountList:
                accountList.append(account)
                tempDays.append(0)
                tempSeconds.append(0)
            else:
                tempDays.append((dataframe[dataframe['AccountId'] == account].reset_index(drop=True).loc[row, 'TransactionStartTime'] - 
                                 dataframe[dataframe['AccountId'] == account].reset_index(drop=True).loc[row-1, 'TransactionStartTime']).days)
                tempSeconds.append((dataframe[dataframe['AccountId'] == account].reset_index(drop=True).loc[row, 'TransactionStartTime'] - 
                                    dataframe[dataframe['AccountId'] == account].reset_index(drop=True).loc[row-1, 'TransactionStartTime']).seconds)
        for day, second in zip(tempDays, tempSeconds):
            daysDiff.append(day)
            secondsDiff.append(second)
    return daysDiff, secondsDiff

In [19]:
df_account_sorted = df.sort_values(by=['AccountId', 'TransactionStartTime'])
# timeDifferenceList = timeDifference(df.sort_values(by=['AccountId', 'TransactionStartTime']))

In [20]:
# pd.DataFrame({'DaysDiff':timeDifferenceList[0], 'SecondsDiff':timeDifferenceList[1]}).to_csv('time_differences.csv', index=False)

In [21]:
df_account_sorted.reset_index(drop=True, inplace=True)
df_account_sorted['DaysDiff'] = pd.read_csv('time_differences.csv').DaysDiff.values
df_account_sorted['SecondsDiff'] = pd.read_csv('time_differences.csv').SecondsDiff.values
df_account_sorted.to_csv('df_update_1', index=False)

In [22]:
df_account_sorted

Unnamed: 0,TransactionId,BatchId,AccountId,SubscriptionId,CustomerId,ProviderId,ProductId,ProductCategory,ChannelId,Amount,Value,TransactionStartTime,PricingStrategy,FraudResult,Debit,DaysDiff,SecondsDiff
0,TransactionId_532,BatchId_1117,AccountId_1,SubscriptionId_3960,CustomerId_46,ProviderId_3,ProductId_15,financial_services,ChannelId_3,30000.00000,30000,2019-01-09 15:39:57,2,0,1,0,0
1,TransactionId_58617,BatchId_19521,AccountId_1,SubscriptionId_3960,CustomerId_46,ProviderId_3,ProductId_15,financial_services,ChannelId_3,20000.00000,20000,2019-01-09 19:24:53,2,0,1,0,13496
2,TransactionId_98020,BatchId_19521,AccountId_1,SubscriptionId_3960,CustomerId_46,ProviderId_3,ProductId_15,financial_services,ChannelId_3,20000.00000,20000,2019-01-09 19:25:12,2,0,1,0,19
3,TransactionId_20966,BatchId_26095,AccountId_10,SubscriptionId_4346,CustomerId_1093,ProviderId_4,ProductId_1,airtime,ChannelId_5,-2000.00000,2000,2018-11-15 18:00:39,4,0,0,0,0
4,TransactionId_106422,BatchId_38522,AccountId_10,SubscriptionId_4346,CustomerId_4552,ProviderId_4,ProductId_3,airtime,ChannelId_5,-10000.00000,10000,2018-11-15 18:32:11,4,0,0,0,1892
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
95657,TransactionId_99694,BatchId_96892,AccountId_998,SubscriptionId_247,CustomerId_1349,ProviderId_3,ProductId_3,airtime,ChannelId_3,1000.00000,1000,2018-12-22 06:03:37,1,0,1,0,121
95658,TransactionId_105745,BatchId_96892,AccountId_998,SubscriptionId_247,CustomerId_1349,ProviderId_3,ProductId_3,airtime,ChannelId_3,1000.00000,1000,2018-12-22 06:04:18,1,0,1,0,41
95659,TransactionId_6566,BatchId_28301,AccountId_998,SubscriptionId_247,CustomerId_1349,ProviderId_3,ProductId_3,airtime,ChannelId_3,1000.00000,1000,2018-12-22 06:06:14,1,0,1,0,116
95660,TransactionId_58894,BatchId_99925,AccountId_998,SubscriptionId_247,CustomerId_1349,ProviderId_3,ProductId_15,financial_services,ChannelId_3,2000.00000,2000,2018-12-22 06:07:46,2,0,1,0,92


In [23]:
# Let's mince the TransactionStartTime column into separate element such as year, month, etc
df_account_sorted['Year'] = df_account_sorted.TransactionStartTime.apply(lambda x: x.year)
df_account_sorted['Month'] = df_account_sorted.TransactionStartTime.apply(lambda x: x.month)
df_account_sorted['Day'] = df_account_sorted.TransactionStartTime.apply(lambda x: x.day)
df_account_sorted['Hour'] = df_account_sorted.TransactionStartTime.apply(lambda x: x.hour)
df_account_sorted['Minute'] = df_account_sorted.TransactionStartTime.apply(lambda x: x.minute)
df_account_sorted['Second'] = df_account_sorted.TransactionStartTime.apply(lambda x: x.second)

df_account_sorted

Unnamed: 0,TransactionId,BatchId,AccountId,SubscriptionId,CustomerId,ProviderId,ProductId,ProductCategory,ChannelId,Amount,Value,TransactionStartTime,PricingStrategy,FraudResult,Debit,DaysDiff,SecondsDiff,Year,Month,Day,Hour,Minute,Second
0,TransactionId_532,BatchId_1117,AccountId_1,SubscriptionId_3960,CustomerId_46,ProviderId_3,ProductId_15,financial_services,ChannelId_3,30000.00000,30000,2019-01-09 15:39:57,2,0,1,0,0,2019,1,9,15,39,57
1,TransactionId_58617,BatchId_19521,AccountId_1,SubscriptionId_3960,CustomerId_46,ProviderId_3,ProductId_15,financial_services,ChannelId_3,20000.00000,20000,2019-01-09 19:24:53,2,0,1,0,13496,2019,1,9,19,24,53
2,TransactionId_98020,BatchId_19521,AccountId_1,SubscriptionId_3960,CustomerId_46,ProviderId_3,ProductId_15,financial_services,ChannelId_3,20000.00000,20000,2019-01-09 19:25:12,2,0,1,0,19,2019,1,9,19,25,12
3,TransactionId_20966,BatchId_26095,AccountId_10,SubscriptionId_4346,CustomerId_1093,ProviderId_4,ProductId_1,airtime,ChannelId_5,-2000.00000,2000,2018-11-15 18:00:39,4,0,0,0,0,2018,11,15,18,0,39
4,TransactionId_106422,BatchId_38522,AccountId_10,SubscriptionId_4346,CustomerId_4552,ProviderId_4,ProductId_3,airtime,ChannelId_5,-10000.00000,10000,2018-11-15 18:32:11,4,0,0,0,1892,2018,11,15,18,32,11
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
95657,TransactionId_99694,BatchId_96892,AccountId_998,SubscriptionId_247,CustomerId_1349,ProviderId_3,ProductId_3,airtime,ChannelId_3,1000.00000,1000,2018-12-22 06:03:37,1,0,1,0,121,2018,12,22,6,3,37
95658,TransactionId_105745,BatchId_96892,AccountId_998,SubscriptionId_247,CustomerId_1349,ProviderId_3,ProductId_3,airtime,ChannelId_3,1000.00000,1000,2018-12-22 06:04:18,1,0,1,0,41,2018,12,22,6,4,18
95659,TransactionId_6566,BatchId_28301,AccountId_998,SubscriptionId_247,CustomerId_1349,ProviderId_3,ProductId_3,airtime,ChannelId_3,1000.00000,1000,2018-12-22 06:06:14,1,0,1,0,116,2018,12,22,6,6,14
95660,TransactionId_58894,BatchId_99925,AccountId_998,SubscriptionId_247,CustomerId_1349,ProviderId_3,ProductId_15,financial_services,ChannelId_3,2000.00000,2000,2018-12-22 06:07:46,2,0,1,0,92,2018,12,22,6,7,46


In [24]:
df_account_sorted.groupby('FraudResult').mean()

Unnamed: 0_level_0,Amount,Value,PricingStrategy,Debit,DaysDiff,SecondsDiff,Year,Month,Day,Hour,Minute,Second
FraudResult,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
0,3627.72266,6763.2267,2.25709,0.60004,0.6148,10268.22193,2018.49689,6.56832,15.90638,12.44591,29.4863,29.54902
1,1535272.3886,1561819.53886,1.70466,0.97409,0.76166,10742.6114,2018.60622,5.53368,14.18135,13.34197,28.13472,29.67358


In [25]:
df_account_sorted[df_account_sorted.FraudResult == 0].describe()

Unnamed: 0,Amount,Value,PricingStrategy,FraudResult,Debit,DaysDiff,SecondsDiff,Year,Month,Day,Hour,Minute,Second
count,95469.0,95469.0,95469.0,95469.0,95469.0,95469.0,95469.0,95469.0,95469.0,95469.0,95469.0,95469.0,95469.0
mean,3627.72266,6763.2267,2.25709,0.0,0.60004,0.6148,10268.22193,2018.49689,6.56832,15.90638,12.44591,29.4863,29.54902
std,40357.10782,39994.83538,0.73197,0.0,0.48989,3.10648,21291.02604,0.49999,5.22438,8.96189,4.84636,17.29072,17.31224
min,-1000000.0,2.0,0.0,0.0,0.0,0.0,0.0,2018.0,1.0,1.0,0.0,0.0,0.0
25%,-50.0,250.0,2.0,0.0,0.0,0.0,65.0,2018.0,1.0,8.0,8.0,15.0,15.0
50%,1000.0,1000.0,2.0,0.0,1.0,0.0,240.0,2018.0,11.0,16.0,13.0,29.0,29.0
75%,2500.0,5000.0,2.0,0.0,1.0,0.0,5506.0,2019.0,12.0,24.0,17.0,45.0,45.0
max,2400000.0,2400000.0,4.0,0.0,1.0,79.0,86393.0,2019.0,12.0,31.0,23.0,59.0,59.0


In [26]:
df_account_sorted[df_account_sorted.FraudResult == 1].describe()

Unnamed: 0,Amount,Value,PricingStrategy,FraudResult,Debit,DaysDiff,SecondsDiff,Year,Month,Day,Hour,Minute,Second
count,193.0,193.0,193.0,193.0,193.0,193.0,193.0,193.0,193.0,193.0,193.0,193.0,193.0
mean,1535272.3886,1561819.53886,1.70466,1.0,0.97409,0.76166,10742.6114,2018.60622,5.53368,14.18135,13.34197,28.13472,29.67358
std,2100525.20747,2081227.29783,0.9633,0.0,0.15927,4.19141,21456.59794,0.48986,5.09699,9.27661,5.07313,16.75661,17.15621
min,-900000.0,500.0,0.0,1.0,0.0,0.0,0.0,2018.0,1.0,1.0,0.0,0.0,0.0
25%,500000.0,500000.0,2.0,1.0,1.0,0.0,54.0,2018.0,1.0,6.0,10.0,14.0,15.0
50%,600000.0,650000.0,2.0,1.0,1.0,0.0,256.0,2019.0,2.0,12.0,13.0,27.0,29.0
75%,2000000.0,2000000.0,2.0,1.0,1.0,0.0,5180.0,2019.0,12.0,22.0,17.0,43.0,45.0
max,9880000.0,9880000.0,4.0,1.0,1.0,43.0,82437.0,2019.0,12.0,31.0,22.0,59.0,59.0


In [27]:
df_account_sorted[df_account_sorted.FraudResult == 1]

Unnamed: 0,TransactionId,BatchId,AccountId,SubscriptionId,CustomerId,ProviderId,ProductId,ProductCategory,ChannelId,Amount,Value,TransactionStartTime,PricingStrategy,FraudResult,Debit,DaysDiff,SecondsDiff,Year,Month,Day,Hour,Minute,Second
2318,TransactionId_27985,BatchId_15818,AccountId_1178,SubscriptionId_2288,CustomerId_1535,ProviderId_1,ProductId_15,financial_services,ChannelId_3,9870000.00000,9870000,2019-02-06 21:49:57,2,1,1,0,0,2019,2,6,21,49,57
2319,TransactionId_55014,BatchId_130638,AccountId_1178,SubscriptionId_2288,CustomerId_1535,ProviderId_1,ProductId_15,financial_services,ChannelId_3,9856000.00000,9856000,2019-02-06 21:58:05,2,1,1,0,488,2019,2,6,21,58,5
2321,TransactionId_31461,BatchId_56005,AccountId_1178,SubscriptionId_2288,CustomerId_1535,ProviderId_1,ProductId_15,financial_services,ChannelId_3,9880000.00000,9880000,2019-02-07 03:45:05,2,1,1,0,20578,2019,2,7,3,45,5
2322,TransactionId_15293,BatchId_68761,AccountId_1178,SubscriptionId_2288,CustomerId_1535,ProviderId_1,ProductId_15,financial_services,ChannelId_3,9860888.00000,9860888,2019-02-10 05:11:25,2,1,1,3,5180,2019,2,10,5,11,25
2323,TransactionId_137519,BatchId_61860,AccountId_1178,SubscriptionId_2288,CustomerId_1535,ProviderId_1,ProductId_15,financial_services,ChannelId_3,9850000.00000,9850000,2019-02-11 03:26:13,2,1,1,0,80088,2019,2,11,3,26,13
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
93802,TransactionId_119486,BatchId_77312,AccountId_830,SubscriptionId_1262,CustomerId_1175,ProviderId_3,ProductId_15,financial_services,ChannelId_3,5000000.00000,5000000,2019-01-25 18:21:38,0,1,1,0,3728,2019,1,25,18,21,38
93887,TransactionId_61258,BatchId_127071,AccountId_830,SubscriptionId_1262,CustomerId_1175,ProviderId_1,ProductId_15,financial_services,ChannelId_3,2500000.00000,2500000,2019-02-08 09:23:08,2,1,1,0,309,2019,2,8,9,23,8
95354,TransactionId_25630,BatchId_1480,AccountId_953,SubscriptionId_162,CustomerId_1302,ProviderId_1,ProductId_15,financial_services,ChannelId_3,5000000.00000,5000000,2019-01-10 12:08:22,2,1,1,0,0,2019,1,10,12,8,22
95356,TransactionId_101662,BatchId_86823,AccountId_953,SubscriptionId_162,CustomerId_1302,ProviderId_1,ProductId_5,transport,ChannelId_3,520000.00000,520000,2019-01-10 12:38:57,2,1,1,0,1007,2019,1,10,12,38,57


In [28]:
df_account_sorted[df_account_sorted.AccountId == 'AccountId_1178']

Unnamed: 0,TransactionId,BatchId,AccountId,SubscriptionId,CustomerId,ProviderId,ProductId,ProductCategory,ChannelId,Amount,Value,TransactionStartTime,PricingStrategy,FraudResult,Debit,DaysDiff,SecondsDiff,Year,Month,Day,Hour,Minute,Second
2318,TransactionId_27985,BatchId_15818,AccountId_1178,SubscriptionId_2288,CustomerId_1535,ProviderId_1,ProductId_15,financial_services,ChannelId_3,9870000.0,9870000,2019-02-06 21:49:57,2,1,1,0,0,2019,2,6,21,49,57
2319,TransactionId_55014,BatchId_130638,AccountId_1178,SubscriptionId_2288,CustomerId_1535,ProviderId_1,ProductId_15,financial_services,ChannelId_3,9856000.0,9856000,2019-02-06 21:58:05,2,1,1,0,488,2019,2,6,21,58,5
2320,TransactionId_31746,BatchId_99391,AccountId_1178,SubscriptionId_2288,CustomerId_1535,ProviderId_1,ProductId_15,financial_services,ChannelId_3,1095860.0,1095860,2019-02-06 22:02:07,2,0,1,0,242,2019,2,6,22,2,7
2321,TransactionId_31461,BatchId_56005,AccountId_1178,SubscriptionId_2288,CustomerId_1535,ProviderId_1,ProductId_15,financial_services,ChannelId_3,9880000.0,9880000,2019-02-07 03:45:05,2,1,1,0,20578,2019,2,7,3,45,5
2322,TransactionId_15293,BatchId_68761,AccountId_1178,SubscriptionId_2288,CustomerId_1535,ProviderId_1,ProductId_15,financial_services,ChannelId_3,9860888.0,9860888,2019-02-10 05:11:25,2,1,1,3,5180,2019,2,10,5,11,25
2323,TransactionId_137519,BatchId_61860,AccountId_1178,SubscriptionId_2288,CustomerId_1535,ProviderId_1,ProductId_15,financial_services,ChannelId_3,9850000.0,9850000,2019-02-11 03:26:13,2,1,1,0,80088,2019,2,11,3,26,13
2324,TransactionId_96894,BatchId_48649,AccountId_1178,SubscriptionId_2288,CustomerId_1535,ProviderId_1,ProductId_15,financial_services,ChannelId_3,9800000.0,9800000,2019-02-11 03:31:21,2,1,1,0,308,2019,2,11,3,31,21


In [29]:
df_account_sorted[df_account_sorted.AccountId == 'AccountId_953']

Unnamed: 0,TransactionId,BatchId,AccountId,SubscriptionId,CustomerId,ProviderId,ProductId,ProductCategory,ChannelId,Amount,Value,TransactionStartTime,PricingStrategy,FraudResult,Debit,DaysDiff,SecondsDiff,Year,Month,Day,Hour,Minute,Second
95354,TransactionId_25630,BatchId_1480,AccountId_953,SubscriptionId_162,CustomerId_1302,ProviderId_1,ProductId_15,financial_services,ChannelId_3,5000000.0,5000000,2019-01-10 12:08:22,2,1,1,0,0,2019,1,10,12,8,22
95355,TransactionId_61306,BatchId_114066,AccountId_953,SubscriptionId_162,CustomerId_1302,ProviderId_1,ProductId_15,financial_services,ChannelId_3,1000.0,1000,2019-01-10 12:22:10,2,0,1,0,828,2019,1,10,12,22,10
95356,TransactionId_101662,BatchId_86823,AccountId_953,SubscriptionId_162,CustomerId_1302,ProviderId_1,ProductId_5,transport,ChannelId_3,520000.0,520000,2019-01-10 12:38:57,2,1,1,0,1007,2019,1,10,12,38,57
95357,TransactionId_91613,BatchId_127377,AccountId_953,SubscriptionId_162,CustomerId_1302,ProviderId_1,ProductId_15,financial_services,ChannelId_3,10000.0,10000,2019-01-11 08:08:52,2,0,1,0,70195,2019,1,11,8,8,52
95358,TransactionId_104105,BatchId_34534,AccountId_953,SubscriptionId_162,CustomerId_1302,ProviderId_6,ProductId_3,airtime,ChannelId_3,500.0,500,2019-01-13 08:07:43,2,0,1,1,86331,2019,1,13,8,7,43
95359,TransactionId_88970,BatchId_40132,AccountId_953,SubscriptionId_162,CustomerId_1302,ProviderId_1,ProductId_3,airtime,ChannelId_3,500.0,500,2019-01-13 08:09:17,4,0,1,0,94,2019,1,13,8,9,17
95360,TransactionId_507,BatchId_55624,AccountId_953,SubscriptionId_162,CustomerId_1302,ProviderId_6,ProductId_10,airtime,ChannelId_3,500.0,500,2019-01-14 08:46:30,2,0,1,1,2233,2019,1,14,8,46,30
95361,TransactionId_85815,BatchId_118851,AccountId_953,SubscriptionId_162,CustomerId_1302,ProviderId_3,ProductId_15,financial_services,ChannelId_3,2000000.0,2000000,2019-01-18 15:09:59,2,1,1,4,23009,2019,1,18,15,9,59
95362,TransactionId_67733,BatchId_46297,AccountId_953,SubscriptionId_162,CustomerId_1302,ProviderId_3,ProductId_15,financial_services,ChannelId_3,100000.0,100000,2019-01-18 15:12:57,2,0,1,0,178,2019,1,18,15,12,57
95363,TransactionId_62632,BatchId_115639,AccountId_953,SubscriptionId_162,CustomerId_1302,ProviderId_3,ProductId_15,financial_services,ChannelId_3,50000.0,50000,2019-01-18 15:15:29,2,0,1,0,152,2019,1,18,15,15,29


In [30]:
df_account_sorted[df_account_sorted.AccountId == 'AccountId_953'].groupby('ProductCategory').median()

Unnamed: 0_level_0,Amount,Value,PricingStrategy,FraudResult,Debit,DaysDiff,SecondsDiff,Year,Month,Day,Hour,Minute,Second
ProductCategory,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1
airtime,500.0,500.0,2.0,0.0,1.0,0.5,1217.0,2019.0,1.0,13.5,8.0,24.5,23.5
financial_services,20000.0,20000.0,2.0,0.0,1.0,0.0,828.0,2019.0,1.0,18.0,13.0,15.0,29.0
transport,520000.0,520000.0,2.0,1.0,1.0,0.0,1007.0,2019.0,1.0,10.0,12.0,38.0,57.0


In [31]:
df_account_sorted.groupby('ProductCategory').median()

Unnamed: 0_level_0,Amount,Value,PricingStrategy,FraudResult,Debit,DaysDiff,SecondsDiff,Year,Month,Day,Hour,Minute,Second
ProductCategory,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1
airtime,1000.0,1500.0,2.0,0.0,1.0,0.0,1031.0,2019.0,2.0,15.0,13.0,29.0,29.0
data_bundles,1000.0,1000.0,2.0,0.0,1.0,0.0,8721.0,2018.0,11.0,18.0,12.0,29.0,29.0
financial_services,-50.0,220.0,2.0,0.0,0.0,0.0,119.0,2018.0,11.0,17.0,13.0,30.0,30.0
movies,5000.0,7000.0,2.0,0.0,1.0,0.0,405.0,2018.0,12.0,19.0,12.0,26.0,29.0
other,500.0,500.0,1.0,0.0,1.0,0.0,6.0,2018.0,12.0,13.0,8.0,17.0,41.0
ticket,80000.0,80000.0,3.0,0.0,1.0,0.0,195.0,2018.0,12.0,14.0,11.0,32.0,27.0
transport,75000.0,75000.0,2.0,0.0,1.0,0.0,699.0,2019.0,2.0,17.0,11.0,30.0,34.0
tv,11000.0,11000.0,2.0,0.0,1.0,0.0,2207.0,2018.0,11.0,15.0,14.0,28.0,29.0
utility_bill,10000.0,11200.0,2.0,0.0,1.0,0.0,1234.5,2019.0,2.0,17.0,13.0,31.0,29.0


In [32]:
# adding amount differences between a transaction with the median value of the ProductCategory in a same AccountId
def amountDifference(dataframe):
    accountList = []
    amountDiff = []
    for account in dataframe.sort_values(by=['AccountId', 'TransactionStartTime']).AccountId.unique():
        tempAmount = []
        for row in range(len(dataframe[dataframe['AccountId'] == account])):
            if account not in accountList:
                accountList.append(account)
                tempAmount.append(0)
            else:
                tempAmount.append((dataframe[dataframe['AccountId'] == account].reset_index(drop=True).loc[row, 'Amount'] -
                                   dataframe[dataframe['AccountId'] == account].reset_index(drop=True).loc[row-1, 'Amount']))
        for amount in tempAmount:
            amountDiff.append(amount)
    return amountDiff

In [33]:
amountDifference(df_account_sorted[df_account_sorted.AccountId == 'AccountId_830'])

[0,
 3500.0,
 -12500.0,
 1000.0,
 -1000.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 7000.0,
 -3000.0,
 -3000.0,
 1500.0,
 200.0,
 -1700.0,
 -1000.0,
 1000.0,
 22000.0,
 -22000.0,
 -1000.0,
 0.0,
 9000.0,
 -9000.0,
 0.0,
 0.0,
 0.0,
 500.0,
 -500.0,
 0.0,
 0.0,
 0.0,
 -500.0,
 34500.0,
 0.0,
 0.0,
 0.0,
 -1000.0,
 -23550.0,
 9550.0,
 -15000.0,
 4995000.0,
 -4999000.0,
 1000.0,
 0.0,
 0.0,
 43000.0,
 0.0,
 -25000.0,
 40000.0,
 0.0,
 -50500.0,
 -2100.0,
 0.0,
 7600.0,
 -13000.0,
 -1000.0,
 -500.0,
 0.0,
 500.0,
 -500.0,
 1500.0,
 22500.0,
 -23500.0,
 89000.0,
 -86500.0,
 196500.0,
 -199000.0,
 1000.0,
 -1000.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 1000.0,
 0.0,
 -500.0,
 -500.0,
 4000.0,
 44500.0,
 -48380.0,
 1000.0,
 568.0,
 -1188.0,
 -500.0,
 1000.0,
 -500.0,
 -500.0,
 0.0,
 1000.0,
 -1000.0,
 0.0,
 1000.0,
 -1500.0,
 500.0,
 1000.0,
 -1000.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 6000.0,
 -6000.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 -500.0,
 500.0,
 0.0,
 0.0,
 0.0,
 0.0,
 1000

In [None]:
# let's inspect value differences for each ProductCategory

#### airtime

In [35]:
# airtime
df_account_sorted[df_account_sorted.ProductCategory == 'airtime'].describe()

Unnamed: 0,Amount,Value,PricingStrategy,FraudResult,Debit,DaysDiff,SecondsDiff,Year,Month,Day,Hour,Minute,Second
count,45027.0,45027.0,45027.0,45027.0,45027.0,45027.0,45027.0,45027.0,45027.0,45027.0,45027.0,45027.0,45027.0
mean,822.95643,6049.79961,2.54279,0.0004,0.88147,0.68648,14620.18806,2018.51291,6.40458,15.21934,12.47571,29.40236,29.48242
std,23097.89148,22306.70247,0.95283,0.01999,0.32324,3.0179,24005.9442,0.49984,5.24023,9.00809,4.81055,17.25971,17.33451
min,-1000000.0,33.0,0.0,0.0,0.0,0.0,0.0,2018.0,1.0,1.0,0.0,0.0,0.0
25%,1000.0,1000.0,2.0,0.0,1.0,0.0,108.0,2018.0,1.0,8.0,9.0,14.0,15.0
50%,1000.0,1500.0,2.0,0.0,1.0,0.0,1031.0,2019.0,2.0,15.0,13.0,29.0,29.0
75%,3000.0,5000.0,4.0,0.0,1.0,0.0,18147.5,2019.0,12.0,23.0,16.0,44.0,45.0
max,2000000.0,2000000.0,4.0,1.0,1.0,79.0,86393.0,2019.0,12.0,31.0,23.0,59.0,59.0


In [36]:
df_account_sorted[(df_account_sorted.ProductCategory == 'airtime') & (df_account_sorted.FraudResult == 1)]

Unnamed: 0,TransactionId,BatchId,AccountId,SubscriptionId,CustomerId,ProviderId,ProductId,ProductCategory,ChannelId,Amount,Value,TransactionStartTime,PricingStrategy,FraudResult,Debit,DaysDiff,SecondsDiff,Year,Month,Day,Hour,Minute,Second
10336,TransactionId_128664,BatchId_128748,AccountId_1909,SubscriptionId_2496,CustomerId_2303,ProviderId_6,ProductId_3,airtime,ChannelId_3,500000.0,500000,2019-02-12 18:07:11,2,1,1,0,19246,2019,2,12,18,7,11
33296,TransactionId_107009,BatchId_125211,AccountId_3337,SubscriptionId_1252,CustomerId_3768,ProviderId_3,ProductId_10,airtime,ChannelId_3,200000.0,200000,2019-01-26 10:19:29,1,1,1,29,76666,2019,1,26,10,19,29
33297,TransactionId_45737,BatchId_125211,AccountId_3337,SubscriptionId_1252,CustomerId_3768,ProviderId_3,ProductId_10,airtime,ChannelId_3,200000.0,200000,2019-01-26 10:20:34,1,1,1,0,65,2019,1,26,10,20,34
42555,TransactionId_64893,BatchId_126705,AccountId_4003,SubscriptionId_3002,CustomerId_4453,ProviderId_1,ProductId_10,airtime,ChannelId_3,500000.0,500000,2018-11-27 13:43:04,0,1,1,0,70,2018,11,27,13,43,4
42568,TransactionId_12019,BatchId_71337,AccountId_4003,SubscriptionId_3002,CustomerId_4453,ProviderId_1,ProductId_10,airtime,ChannelId_3,500000.0,500000,2018-12-04 09:14:20,4,1,1,0,163,2018,12,4,9,14,20
49379,TransactionId_56954,BatchId_76447,AccountId_4249,SubscriptionId_4429,CustomerId_7339,ProviderId_4,ProductId_3,airtime,ChannelId_2,-900000.0,900000,2019-01-10 15:48:59,4,1,0,0,5138,2019,1,10,15,48,59
49381,TransactionId_73797,BatchId_127642,AccountId_4249,SubscriptionId_4429,CustomerId_7339,ProviderId_4,ProductId_10,airtime,ChannelId_2,-500000.0,500000,2019-01-10 16:03:35,4,1,0,0,623,2019,1,10,16,3,35
49383,TransactionId_87682,BatchId_101919,AccountId_4249,SubscriptionId_4429,CustomerId_7401,ProviderId_4,ProductId_3,airtime,ChannelId_2,-500000.0,500000,2019-01-10 21:27:12,4,1,0,0,19306,2019,1,10,21,27,12
49401,TransactionId_126320,BatchId_93368,AccountId_4249,SubscriptionId_4429,CustomerId_7429,ProviderId_4,ProductId_3,airtime,ChannelId_2,-500000.0,500000,2019-01-15 11:09:57,0,1,0,0,844,2019,1,15,11,9,57
52554,TransactionId_99174,BatchId_22742,AccountId_4421,SubscriptionId_4038,CustomerId_4878,ProviderId_6,ProductId_10,airtime,ChannelId_3,600000.0,600000,2018-12-23 16:32:01,2,1,1,0,184,2018,12,23,16,32,1


In [38]:
df_account_sorted[(df_account_sorted.ProductCategory == 'airtime') & (df_account_sorted.FraudResult == 0)].describe()

Unnamed: 0,Amount,Value,PricingStrategy,FraudResult,Debit,DaysDiff,SecondsDiff,Year,Month,Day,Hour,Minute,Second
count,45009.0,45009.0,45009.0,45009.0,45009.0,45009.0,45009.0,45009.0,45009.0,45009.0,45009.0,45009.0,45009.0
mean,745.51221,5867.80037,2.54265,0.0,0.88151,0.68597,14617.4406,2018.51285,6.4053,15.21867,12.47584,29.40403,29.48297
std,20877.38283,20049.66505,0.95253,0.0,0.32319,3.01546,24004.16473,0.49984,5.24021,9.00841,4.81075,17.25999,17.33432
min,-1000000.0,33.0,0.0,0.0,0.0,0.0,0.0,2018.0,1.0,1.0,0.0,0.0,0.0
25%,1000.0,1000.0,2.0,0.0,1.0,0.0,107.0,2018.0,1.0,8.0,9.0,14.0,15.0
50%,1000.0,1500.0,2.0,0.0,1.0,0.0,1031.0,2019.0,2.0,15.0,13.0,29.0,29.0
75%,3000.0,5000.0,4.0,0.0,1.0,0.0,18127.0,2019.0,12.0,23.0,16.0,44.0,45.0
max,2000000.0,2000000.0,4.0,0.0,1.0,79.0,86393.0,2019.0,12.0,31.0,23.0,59.0,59.0


#### data_bundles

In [39]:
# data_bundles
df_account_sorted[df_account_sorted.ProductCategory == 'data_bundles'].describe()

Unnamed: 0,Amount,Value,PricingStrategy,FraudResult,Debit,DaysDiff,SecondsDiff,Year,Month,Day,Hour,Minute,Second
count,1613.0,1613.0,1613.0,1613.0,1613.0,1613.0,1613.0,1613.0,1613.0,1613.0,1613.0,1613.0,1613.0
mean,3402.16987,3714.01116,2.43521,0.0,0.9504,0.76751,21936.95226,2018.47985,6.61376,17.54991,12.51829,29.30378,28.76689
std,12128.18062,12036.28969,0.89819,0.0,0.21718,2.74832,26222.97758,0.49975,5.34379,8.26669,5.11898,17.39244,17.17488
min,-20000.0,500.0,0.0,0.0,0.0,0.0,0.0,2018.0,1.0,1.0,0.0,0.0,0.0
25%,500.0,1000.0,2.0,0.0,1.0,0.0,587.0,2018.0,1.0,12.0,8.0,14.0,14.0
50%,1000.0,1000.0,2.0,0.0,1.0,0.0,8721.0,2018.0,11.0,18.0,12.0,29.0,29.0
75%,2000.0,2000.0,2.0,0.0,1.0,0.0,39211.0,2019.0,12.0,24.0,17.0,44.0,43.0
max,284900.0,284900.0,4.0,0.0,1.0,38.0,86223.0,2019.0,12.0,31.0,23.0,59.0,59.0


In [42]:
# seems like there's no fraudulent transaction in data_bundles product category
df_account_sorted[(df_account_sorted.ProductCategory == 'data_bundles') & (df_account_sorted.FraudResult == 1)]

Unnamed: 0,TransactionId,BatchId,AccountId,SubscriptionId,CustomerId,ProviderId,ProductId,ProductCategory,ChannelId,Amount,Value,TransactionStartTime,PricingStrategy,FraudResult,Debit,DaysDiff,SecondsDiff,Year,Month,Day,Hour,Minute,Second


#### financial_services

In [43]:
# financial_services
df_account_sorted[df_account_sorted.ProductCategory == 'financial_services'].describe()

Unnamed: 0,Amount,Value,PricingStrategy,FraudResult,Debit,DaysDiff,SecondsDiff,Year,Month,Day,Hour,Minute,Second
count,45405.0,45405.0,45405.0,45405.0,45405.0,45405.0,45405.0,45405.0,45405.0,45405.0,45405.0,45405.0,45405.0
mean,11435.55946,12734.46757,1.98864,0.00355,0.28138,0.50409,5197.17148,2018.48497,6.69576,16.53853,12.40238,29.54813,29.66217
std,176493.98019,176405.01611,0.1496,0.05944,0.44968,3.07296,16250.26612,0.49978,5.20328,8.88853,4.88082,17.30694,17.2754
min,-25000.0,2.0,0.0,0.0,0.0,0.0,0.0,2018.0,1.0,1.0,0.0,0.0,0.0
25%,-125.0,50.0,2.0,0.0,0.0,0.0,38.0,2018.0,1.0,9.0,8.0,15.0,15.0
50%,-50.0,220.0,2.0,0.0,0.0,0.0,119.0,2018.0,11.0,17.0,13.0,30.0,30.0
75%,1000.0,5000.0,2.0,0.0,1.0,0.0,421.0,2019.0,12.0,24.0,17.0,45.0,45.0
max,9880000.0,9880000.0,2.0,1.0,1.0,79.0,86393.0,2019.0,12.0,31.0,23.0,59.0,59.0


In [44]:
df_account_sorted[(df_account_sorted.ProductCategory == 'financial_services') & (df_account_sorted.FraudResult == 1)]

Unnamed: 0,TransactionId,BatchId,AccountId,SubscriptionId,CustomerId,ProviderId,ProductId,ProductCategory,ChannelId,Amount,Value,TransactionStartTime,PricingStrategy,FraudResult,Debit,DaysDiff,SecondsDiff,Year,Month,Day,Hour,Minute,Second
2318,TransactionId_27985,BatchId_15818,AccountId_1178,SubscriptionId_2288,CustomerId_1535,ProviderId_1,ProductId_15,financial_services,ChannelId_3,9870000.00000,9870000,2019-02-06 21:49:57,2,1,1,0,0,2019,2,6,21,49,57
2319,TransactionId_55014,BatchId_130638,AccountId_1178,SubscriptionId_2288,CustomerId_1535,ProviderId_1,ProductId_15,financial_services,ChannelId_3,9856000.00000,9856000,2019-02-06 21:58:05,2,1,1,0,488,2019,2,6,21,58,5
2321,TransactionId_31461,BatchId_56005,AccountId_1178,SubscriptionId_2288,CustomerId_1535,ProviderId_1,ProductId_15,financial_services,ChannelId_3,9880000.00000,9880000,2019-02-07 03:45:05,2,1,1,0,20578,2019,2,7,3,45,5
2322,TransactionId_15293,BatchId_68761,AccountId_1178,SubscriptionId_2288,CustomerId_1535,ProviderId_1,ProductId_15,financial_services,ChannelId_3,9860888.00000,9860888,2019-02-10 05:11:25,2,1,1,3,5180,2019,2,10,5,11,25
2323,TransactionId_137519,BatchId_61860,AccountId_1178,SubscriptionId_2288,CustomerId_1535,ProviderId_1,ProductId_15,financial_services,ChannelId_3,9850000.00000,9850000,2019-02-11 03:26:13,2,1,1,0,80088,2019,2,11,3,26,13
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
93175,TransactionId_32870,BatchId_84247,AccountId_777,SubscriptionId_539,CustomerId_1122,ProviderId_1,ProductId_15,financial_services,ChannelId_3,1850000.00000,1850000,2018-12-11 05:54:29,2,1,1,0,0,2018,12,11,5,54,29
93802,TransactionId_119486,BatchId_77312,AccountId_830,SubscriptionId_1262,CustomerId_1175,ProviderId_3,ProductId_15,financial_services,ChannelId_3,5000000.00000,5000000,2019-01-25 18:21:38,0,1,1,0,3728,2019,1,25,18,21,38
93887,TransactionId_61258,BatchId_127071,AccountId_830,SubscriptionId_1262,CustomerId_1175,ProviderId_1,ProductId_15,financial_services,ChannelId_3,2500000.00000,2500000,2019-02-08 09:23:08,2,1,1,0,309,2019,2,8,9,23,8
95354,TransactionId_25630,BatchId_1480,AccountId_953,SubscriptionId_162,CustomerId_1302,ProviderId_1,ProductId_15,financial_services,ChannelId_3,5000000.00000,5000000,2019-01-10 12:08:22,2,1,1,0,0,2019,1,10,12,8,22


In [47]:
df_account_sorted[(df_account_sorted.ProductCategory == 'financial_services') & (df_account_sorted.FraudResult == 0)].describe()

Unnamed: 0,Amount,Value,PricingStrategy,FraudResult,Debit,DaysDiff,SecondsDiff,Year,Month,Day,Hour,Minute,Second
count,45244.0,45244.0,45244.0,45244.0,45244.0,45244.0,45244.0,45244.0,45244.0,45244.0,45244.0,45244.0,45244.0
mean,5201.2209,6504.70672,1.9901,0.0,0.27884,0.50391,5185.11555,2018.48457,6.6995,16.54741,12.39848,29.55059,29.6636
std,53238.18406,53094.6833,0.13959,0.0,0.44844,3.07093,16236.77747,0.49977,5.20327,8.88602,4.87925,17.3098,17.27685
min,-25000.0,2.0,0.0,0.0,0.0,0.0,0.0,2018.0,1.0,1.0,0.0,0.0,0.0
25%,-125.0,50.0,2.0,0.0,0.0,0.0,38.0,2018.0,1.0,9.0,8.0,15.0,15.0
50%,-50.0,220.0,2.0,0.0,0.0,0.0,119.0,2018.0,11.0,17.0,13.0,30.0,30.0
75%,1000.0,5000.0,2.0,0.0,1.0,0.0,420.0,2019.0,12.0,25.0,17.0,45.0,45.0
max,2400000.0,2400000.0,2.0,0.0,1.0,79.0,86393.0,2019.0,12.0,31.0,23.0,59.0,59.0


#### movies

In [48]:
df_account_sorted[df_account_sorted.ProductCategory == 'movies'].describe()

Unnamed: 0,Amount,Value,PricingStrategy,FraudResult,Debit,DaysDiff,SecondsDiff,Year,Month,Day,Hour,Minute,Second
count,175.0,175.0,175.0,175.0,175.0,175.0,175.0,175.0,175.0,175.0,175.0,175.0,175.0
mean,7734.28571,10988.0,1.94857,0.0,0.98857,1.75429,16272.33714,2018.09714,10.65714,18.49143,12.01143,27.66857,28.70857
std,5562.28798,7176.1918,0.2215,0.0,0.1066,5.40882,26456.43158,0.297,3.07888,7.63602,3.86851,17.8771,18.18426
min,-10000.0,700.0,1.0,0.0,0.0,0.0,0.0,2018.0,1.0,1.0,1.0,0.0,0.0
25%,5000.0,7000.0,2.0,0.0,1.0,0.0,77.0,2018.0,11.0,15.0,10.0,13.0,12.5
50%,5000.0,7000.0,2.0,0.0,1.0,0.0,405.0,2018.0,12.0,19.0,12.0,26.0,29.0
75%,10000.0,14000.0,2.0,0.0,1.0,0.0,19009.5,2018.0,12.0,24.5,15.0,43.0,43.0
max,40000.0,49000.0,2.0,0.0,1.0,35.0,84557.0,2019.0,12.0,30.0,21.0,59.0,59.0


In [49]:
# seems like there's no fraudulent transaction in movies product category
df_account_sorted[(df_account_sorted.ProductCategory == 'movies') & (df_account_sorted.FraudResult == 1)]

Unnamed: 0,TransactionId,BatchId,AccountId,SubscriptionId,CustomerId,ProviderId,ProductId,ProductCategory,ChannelId,Amount,Value,TransactionStartTime,PricingStrategy,FraudResult,Debit,DaysDiff,SecondsDiff,Year,Month,Day,Hour,Minute,Second


#### tv

In [51]:
df_account_sorted[df_account_sorted.ProductCategory == 'tv'].describe()

Unnamed: 0,Amount,Value,PricingStrategy,FraudResult,Debit,DaysDiff,SecondsDiff,Year,Month,Day,Hour,Minute,Second
count,1279.0,1279.0,1279.0,1279.0,1279.0,1279.0,1279.0,1279.0,1279.0,1279.0,1279.0,1279.0,1279.0
mean,16641.12588,18156.43862,1.93901,0.0,0.97107,1.05317,15729.06177,2018.49648,6.53636,14.92729,12.76701,29.11415,29.50508
std,42052.95786,42517.70557,0.26425,0.0,0.16767,5.16976,23998.98842,0.50018,5.1608,8.94839,4.6558,17.06884,17.8358
min,-115000.0,900.0,0.0,0.0,0.0,0.0,0.0,2018.0,1.0,1.0,0.0,0.0,0.0
25%,3500.0,3500.0,2.0,0.0,1.0,0.0,259.5,2018.0,1.0,7.0,8.0,15.0,14.0
50%,11000.0,11000.0,2.0,0.0,1.0,0.0,2207.0,2018.0,11.0,15.0,14.0,28.0,29.0
75%,12000.0,12000.0,2.0,0.0,1.0,0.0,22819.0,2019.0,12.0,23.0,17.0,44.0,45.0
max,280000.0,288400.0,2.0,0.0,1.0,79.0,86319.0,2019.0,12.0,31.0,21.0,59.0,59.0


In [53]:
# seems like there's no fraudulent transaction in tv product category
df_account_sorted[(df_account_sorted.ProductCategory == 'tv') & (df_account_sorted.FraudResult == 1)]

Unnamed: 0,TransactionId,BatchId,AccountId,SubscriptionId,CustomerId,ProviderId,ProductId,ProductCategory,ChannelId,Amount,Value,TransactionStartTime,PricingStrategy,FraudResult,Debit,DaysDiff,SecondsDiff,Year,Month,Day,Hour,Minute,Second


#### transport

In [55]:
df_account_sorted[df_account_sorted.ProductCategory == 'transport'].describe()

Unnamed: 0,Amount,Value,PricingStrategy,FraudResult,Debit,DaysDiff,SecondsDiff,Year,Month,Day,Hour,Minute,Second
count,25.0,25.0,25.0,25.0,25.0,25.0,25.0,25.0,25.0,25.0,25.0,25.0,25.0
mean,170240.0,170240.0,1.76,0.08,1.0,3.64,18602.88,2018.52,6.16,16.36,11.52,29.12,32.76
std,332964.87302,332964.87302,0.59722,0.27689,0.0,7.84156,28234.22818,0.5099,5.33604,8.11829,5.07543,15.648,16.37905
min,26000.0,26000.0,0.0,0.0,1.0,0.0,0.0,2018.0,1.0,1.0,3.0,3.0,3.0
25%,41000.0,41000.0,2.0,0.0,1.0,0.0,240.0,2018.0,1.0,11.0,7.0,18.0,20.0
50%,75000.0,75000.0,2.0,0.0,1.0,0.0,699.0,2019.0,2.0,17.0,11.0,30.0,34.0
75%,100000.0,100000.0,2.0,0.0,1.0,3.0,31413.0,2019.0,12.0,21.0,14.0,39.0,46.0
max,1660000.0,1660000.0,2.0,1.0,1.0,30.0,84601.0,2019.0,12.0,30.0,22.0,59.0,57.0


In [54]:
df_account_sorted[(df_account_sorted.ProductCategory == 'transport') & (df_account_sorted.FraudResult == 1)]

Unnamed: 0,TransactionId,BatchId,AccountId,SubscriptionId,CustomerId,ProviderId,ProductId,ProductCategory,ChannelId,Amount,Value,TransactionStartTime,PricingStrategy,FraudResult,Debit,DaysDiff,SecondsDiff,Year,Month,Day,Hour,Minute,Second
3424,TransactionId_118792,BatchId_4007,AccountId_1291,SubscriptionId_141,CustomerId_1653,ProviderId_1,ProductId_5,transport,ChannelId_3,1660000.0,1660000,2019-01-20 09:31:24,2,1,1,22,61931,2019,1,20,9,31,24
95356,TransactionId_101662,BatchId_86823,AccountId_953,SubscriptionId_162,CustomerId_1302,ProviderId_1,ProductId_5,transport,ChannelId_3,520000.0,520000,2019-01-10 12:38:57,2,1,1,0,1007,2019,1,10,12,38,57


In [56]:
df_account_sorted[(df_account_sorted.ProductCategory == 'transport') & (df_account_sorted.FraudResult == 0)]

Unnamed: 0,TransactionId,BatchId,AccountId,SubscriptionId,CustomerId,ProviderId,ProductId,ProductCategory,ChannelId,Amount,Value,TransactionStartTime,PricingStrategy,FraudResult,Debit,DaysDiff,SecondsDiff,Year,Month,Day,Hour,Minute,Second
1538,TransactionId_13653,BatchId_67537,AccountId_1094,SubscriptionId_1321,CustomerId_1449,ProviderId_1,ProductId_5,transport,ChannelId_3,87000.0,87000,2019-01-21 07:03:49,2,0,1,30,31413,2019,1,21,7,3,49
1539,TransactionId_48541,BatchId_53688,AccountId_1094,SubscriptionId_1321,CustomerId_1449,ProviderId_3,ProductId_5,transport,ChannelId_3,75000.0,75000,2019-01-29 10:14:46,1,0,1,8,11457,2019,1,29,10,14,46
3534,TransactionId_42532,BatchId_103107,AccountId_1297,SubscriptionId_1001,CustomerId_1659,ProviderId_1,ProductId_5,transport,ChannelId_3,161000.0,161000,2018-12-05 22:36:07,0,0,1,0,240,2018,12,5,22,36,7
4812,TransactionId_135553,BatchId_34971,AccountId_1384,SubscriptionId_3405,CustomerId_1748,ProviderId_1,ProductId_5,transport,ChannelId_3,62000.0,62000,2019-01-23 20:30:45,2,0,1,0,981,2019,1,23,20,30,45
9889,TransactionId_115597,BatchId_86323,AccountId_1874,SubscriptionId_2840,CustomerId_2267,ProviderId_6,ProductId_5,transport,ChannelId_3,90000.0,90000,2019-01-01 06:44:03,2,0,1,17,13798,2019,1,1,6,44,3
10068,TransactionId_85358,BatchId_5058,AccountId_1898,SubscriptionId_3303,CustomerId_2292,ProviderId_1,ProductId_5,transport,ChannelId_3,180000.0,180000,2018-12-30 21:18:38,2,0,1,0,525,2018,12,30,21,18,38
13347,TransactionId_30804,BatchId_81864,AccountId_2150,SubscriptionId_2536,CustomerId_2555,ProviderId_1,ProductId_5,transport,ChannelId_3,41000.0,41000,2018-11-20 17:50:45,2,0,1,0,0,2018,11,20,17,50,45
13474,TransactionId_121216,BatchId_80370,AccountId_2170,SubscriptionId_4078,CustomerId_2577,ProviderId_5,ProductId_5,transport,ChannelId_3,31000.0,31000,2018-11-22 03:08:47,2,0,1,0,40868,2018,11,22,3,8,47
13488,TransactionId_18370,BatchId_92601,AccountId_2177,SubscriptionId_3168,CustomerId_2584,ProviderId_1,ProductId_5,transport,ChannelId_3,142000.0,142000,2018-12-13 06:47:15,0,0,1,0,0,2018,12,13,6,47,15
14269,TransactionId_36558,BatchId_126034,AccountId_2225,SubscriptionId_4742,CustomerId_2636,ProviderId_5,ProductId_5,transport,ChannelId_3,82000.0,82000,2019-01-21 11:30:46,2,0,1,0,0,2019,1,21,11,30,46


In [57]:
df_account_sorted.ProductCategory.unique()

array(['financial_services', 'airtime', 'data_bundles', 'utility_bill',
       'movies', 'tv', 'transport', 'ticket', 'other'], dtype=object)

#### utility_bill

In [58]:
df_account_sorted[df_account_sorted.ProductCategory == 'utility_bill'].describe()

Unnamed: 0,Amount,Value,PricingStrategy,FraudResult,Debit,DaysDiff,SecondsDiff,Year,Month,Day,Hour,Minute,Second
count,1920.0,1920.0,1920.0,1920.0,1920.0,1920.0,1920.0,1920.0,1920.0,1920.0,1920.0,1920.0,1920.0
mean,17232.85885,20946.69219,1.90365,0.00625,0.94583,1.04792,14364.35938,2018.51562,6.35521,16.07135,12.73958,30.25104,29.40938
std,48719.7336,50305.50028,0.30385,0.07883,0.22641,4.00401,23203.81567,0.49989,5.18846,9.40395,4.98051,17.53765,17.35754
min,-150000.0,1115.0,0.0,0.0,0.0,0.0,0.0,2018.0,1.0,1.0,0.0,0.0,0.0
25%,5000.0,5750.0,2.0,0.0,1.0,0.0,200.75,2018.0,1.0,7.0,8.0,15.0,14.0
50%,10000.0,11200.0,2.0,0.0,1.0,0.0,1234.5,2019.0,2.0,17.0,13.0,31.0,29.0
75%,15000.0,17680.0,2.0,0.0,1.0,0.0,18246.0,2019.0,12.0,25.0,17.0,46.0,44.0
max,700000.0,733000.0,2.0,1.0,1.0,58.0,86245.0,2019.0,12.0,31.0,23.0,59.0,59.0


In [59]:
df_account_sorted[(df_account_sorted.ProductCategory == 'utility_bill') & (df_account_sorted.FraudResult == 1)]

Unnamed: 0,TransactionId,BatchId,AccountId,SubscriptionId,CustomerId,ProviderId,ProductId,ProductCategory,ChannelId,Amount,Value,TransactionStartTime,PricingStrategy,FraudResult,Debit,DaysDiff,SecondsDiff,Year,Month,Day,Hour,Minute,Second
9217,TransactionId_48757,BatchId_93509,AccountId_1823,SubscriptionId_4174,CustomerId_2214,ProviderId_3,ProductId_22,utility_bill,ChannelId_3,700000.0,733000,2018-12-03 11:24:40,1,1,1,1,41522,2018,12,3,11,24,40
20518,TransactionId_85354,BatchId_44669,AccountId_2656,SubscriptionId_2912,CustomerId_3075,ProviderId_5,ProductId_13,utility_bill,ChannelId_3,570000.0,599100,2019-02-04 11:57:53,2,1,1,0,75687,2019,2,4,11,57,53
20519,TransactionId_27200,BatchId_105926,AccountId_2656,SubscriptionId_2912,CustomerId_3075,ProviderId_5,ProductId_13,utility_bill,ChannelId_3,570035.0,599137,2019-02-04 18:25:49,2,1,1,0,23276,2019,2,4,18,25,49
20520,TransactionId_54576,BatchId_131348,AccountId_2656,SubscriptionId_2912,CustomerId_3075,ProviderId_5,ProductId_13,utility_bill,ChannelId_3,570035.0,599137,2019-02-04 19:03:11,2,1,1,0,2242,2019,2,4,19,3,11
20521,TransactionId_114761,BatchId_82467,AccountId_2656,SubscriptionId_2912,CustomerId_3075,ProviderId_5,ProductId_13,utility_bill,ChannelId_3,570035.0,599137,2019-02-04 19:08:58,2,1,1,0,347,2019,2,4,19,8,58
20522,TransactionId_129150,BatchId_5957,AccountId_2656,SubscriptionId_2912,CustomerId_3075,ProviderId_5,ProductId_13,utility_bill,ChannelId_3,570000.0,599100,2019-02-04 20:03:58,2,1,1,0,3300,2019,2,4,20,3,58
20523,TransactionId_62440,BatchId_113961,AccountId_2656,SubscriptionId_2912,CustomerId_3075,ProviderId_5,ProductId_13,utility_bill,ChannelId_3,570035.0,599137,2019-02-05 05:29:17,2,1,1,0,33919,2019,2,5,5,29,17
20525,TransactionId_113136,BatchId_42862,AccountId_2656,SubscriptionId_2912,CustomerId_3075,ProviderId_5,ProductId_22,utility_bill,ChannelId_3,599392.0,629374,2019-02-06 09:10:06,2,1,1,0,60290,2019,2,6,9,10,6
33292,TransactionId_2703,BatchId_51734,AccountId_3337,SubscriptionId_1252,CustomerId_3768,ProviderId_3,ProductId_21,utility_bill,ChannelId_3,500000.0,521000,2018-12-27 12:48:47,1,1,1,0,0,2018,12,27,12,48,47
33293,TransactionId_63074,BatchId_1552,AccountId_3337,SubscriptionId_1252,CustomerId_3768,ProviderId_3,ProductId_21,utility_bill,ChannelId_3,500000.0,521000,2018-12-27 12:52:24,1,1,1,0,217,2018,12,27,12,52,24


In [61]:
df_account_sorted[(df_account_sorted.ProductCategory == 'utility_bill') & (df_account_sorted.FraudResult == 0)].describe()

Unnamed: 0,Amount,Value,PricingStrategy,FraudResult,Debit,DaysDiff,SecondsDiff,Year,Month,Day,Hour,Minute,Second
count,1908.0,1908.0,1908.0,1908.0,1908.0,1908.0,1908.0,1908.0,1908.0,1908.0,1908.0,1908.0,1908.0
mean,13819.47432,17388.11688,1.90566,0.0,0.94549,1.05398,14328.20283,2018.5152,6.35639,16.09801,12.7348,30.30451,29.3695
std,22459.32359,22307.70133,0.30121,0.0,0.22708,4.01579,23182.56623,0.4999,5.19003,9.38884,4.98355,17.5079,17.34726
min,-150000.0,1115.0,0.0,0.0,0.0,0.0,0.0,2018.0,1.0,1.0,0.0,0.0,0.0
25%,5000.0,5750.0,2.0,0.0,1.0,0.0,200.0,2018.0,1.0,7.0,8.0,15.0,14.0
50%,10000.0,11200.0,2.0,0.0,1.0,0.0,1231.0,2019.0,2.0,17.0,13.0,31.0,29.0
75%,15000.0,16650.0,2.0,0.0,1.0,0.0,18184.75,2019.0,12.0,25.0,17.0,46.0,44.0
max,300000.0,315000.0,2.0,0.0,1.0,58.0,86245.0,2019.0,12.0,31.0,23.0,59.0,59.0
