Import Libraries

In [44]:
import pandas as pd
import numpy as np
from scipy.stats import skew
import matplotlib.pyplot as plt


Import Data

In [56]:
dataframe = pd.read_csv(r'financial_anomaly_data.csv')
dataframe.head()

Unnamed: 0,Timestamp,TransactionID,AccountID,Amount,Merchant,TransactionType,Location
0,01-01-2023 08:00,TXN1127,ACC4,95071.92,MerchantH,Purchase,Tokyo
1,01-01-2023 08:01,TXN1639,ACC10,15607.89,MerchantH,Purchase,London
2,01-01-2023 08:02,TXN872,ACC8,65092.34,MerchantE,Withdrawal,London
3,01-01-2023 08:03,TXN1438,ACC6,87.87,MerchantE,Purchase,London
4,01-01-2023 08:04,TXN1338,ACC6,716.56,MerchantI,Purchase,Los Angeles


Identify and Remove Nulls

In [57]:
# .22% of the rows are null. These values will be removed so analysis can be done. 
null_rows = dataframe['Amount'].isnull().sum()
total_rows = dataframe['Amount'].count()


print('Number of rows with null values:',null_rows)
print('Number of total rows:',total_rows)
print('Percent of total rows that are null:',round(null_rows/total_rows * 100,2),'%')

dataframe = dataframe.dropna(subset=['Amount'])
dataframe['Amount'].isnull().sum()


Number of rows with null values: 481
Number of total rows: 216960
Percent of total rows that are null: 0.22 %


0

In [58]:
# There is a slight right skew, which aligns with the summary data.
print(skew(dataframe['Amount']))  # Close to 0 → symmetric


0.40402518724097153


Obtain summary of the Amount Column

In [59]:
print(dataframe['Amount'].describe())


count    216960.000000
mean      50090.025108
std       29097.905016
min          10.510000
25%       25061.242500
50%       50183.980000
75%       75080.460000
max      978942.260000
Name: Amount, dtype: float64


In [61]:
# Calculate Z-scores for the 'Amount' column
dataframe['z_score'] = stats.zscore(dataframe['Amount'])

# Set Z-score threshold for outliers (commonly 3 or -3)
threshold = 3

# Filter outliers
outliers = dataframe[dataframe['z_score'].abs() > threshold]
outliers = outliers.sort_values(by='Amount')

print(outliers.round(2))

               Timestamp TransactionID AccountID     Amount   Merchant  \
45099   01-02-2023 15:39       TXN1359     ACC11  151020.55  MerchantB   
4148    04-01-2023 05:08        TXN625      ACC1  187344.37  MerchantD   
211866  28-05-2023 11:06        TXN543      ACC2  191561.95  MerchantJ   
211753  28-05-2023 09:13        TXN141      ACC4  223542.64  MerchantG   
4194    04-01-2023 05:54        TXN667      ACC3  272990.11  MerchantE   
3453    03-01-2023 17:33       TXN1690      ACC8  489492.30  MerchantF   
211959  28-05-2023 12:39       TXN1359      ACC8  616155.70  MerchantC   
213460  29-05-2023 13:40       TXN1249      ACC5  689504.90  MerchantI   
3448    03-01-2023 17:28        TXN827     ACC12  712076.97  MerchantA   
45100   01-02-2023 15:40        TXN190      ACC5  873004.79  MerchantI   
2775    03-01-2023 06:15       TXN1049     ACC14  978942.26  MerchantJ   

       TransactionType       Location  z_score  
45099         Transfer          Tokyo     3.47  
4148         

Refer to the file named "Report" to view a detailed analysis of this notebook. 