In [None]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt

data = pd.read_csv('credit_card_transactions.csv')

print(data.info())
print(data.describe())

print(data.isnull().sum())

# Distribution of the target variable
sns.countplot(data['fraud'])
plt.title('Fraud vs Legitimate Transactions')
plt.show()

# Distribution of transaction amount
sns.histplot(data['transaction_amount'], kde=True)
plt.title('Distribution of Transaction Amounts')
plt.show()

# Distribution of transaction time
data['transaction_date'] = pd.to_datetime(data['transaction_date'])
data['transaction_hour'] = data['transaction_date'].dt.hour
sns.countplot(data['transaction_hour'])
plt.title('Transaction Frequency by Hour')
plt.show()

# Fraud rate by merchant
fraud_by_merchant = data.groupby('merchant')['fraud'].mean().sort_values(ascending=False)
sns.barplot(x=fraud_by_merchant.index, y=fraud_by_merchant.values)
plt.title('Fraud Rate by Merchant')
plt.xticks(rotation=90)
plt.show()

# Correlation heatmap
corr = data.corr()
sns.heatmap(corr, annot=True, cmap='coolwarm')
plt.title('Correlation Matrix')
plt.show()
