In [5]:
import plotly.graph_objs as go
import plotly.express as px
import pandas as pd
import numpy as np
import random
from datetime import datetime, timedelta
np.random.seed(123)

## Synthetic Data Creation

### 1. Customer Scores

In [6]:
customer_ids = list(range(1, 1501))
credit_scores = []
risk_categories = []
limit_amounts = []
probability_of_defaults = []
prob_on_time = []
prob_30_late = []
prob_60_late = []
prob_90_late = []

for i in customer_ids:
    probability_of_default = random.uniform(0, 1)
    probability_of_defaults.append(probability_of_default)
    if probability_of_default > 0 and probability_of_default <= 0.4:
        credit_score = random.randint(75, 100)
        credit_scores.append(credit_score)
        risk_category = 'low'
        risk_categories.append(risk_category)
        limit_amount = random.randint(10000000, 50000000)
        limit_amounts.append(limit_amount)
        prob_on_time.append(0.6)
        prob_30_late.append(0.2)
        prob_60_late.append(0.1)
        prob_90_late.append(0.1)

    elif probability_of_default >= 0.4 and probability_of_default <= 0.7:
        credit_score = random.randint(45, 75)
        credit_scores.append(credit_score)
        risk_category = 'medium'
        risk_categories.append(risk_category)
        limit_amount = random.randint(5000000, 10000000)
        limit_amounts.append(limit_amount)
        prob_on_time.append(0.4)
        prob_30_late.append(0.3)
        prob_60_late.append(0.2)
        prob_90_late.append(0.1)
    else:
        credit_score = random.randint(0, 45)
        credit_scores.append(credit_score)
        risk_category = 'high'
        risk_categories.append(risk_category)
        limit_amount = random.randint(500000, 1000000)
        limit_amounts.append(limit_amount)
        prob_on_time.append(0.2)
        prob_30_late.append(0.2)
        prob_60_late.append(0.3)
        prob_90_late.append(0.3)

synthetic_dataset = {'customer_id': customer_ids, 
                     'credit_score': credit_scores, 
                     'risk_category': risk_categories, 
                     'limit_amount': limit_amounts,
                     'probability_of_default': probability_of_defaults,
                     'prob_on_time': prob_on_time,
                     'prob_30_late': prob_30_late,
                     'prob_60_late': prob_60_late,
                     'prob_90_late': prob_90_late}

df = pd.DataFrame(synthetic_dataset)

In [220]:
netcashflow = []
credit_utilization_ratio = []
debt_to_income_ratio = []
collateral_value_ratio = []
risk_category = df['risk_category'].tolist()

for index, row in df.iterrows():
    if risk_category[index] == 'low':
        netcashflow.append(random.uniform(10000000, 50000000))
        credit_utilization_ratio.append(random.uniform(0.25, 0.5))
        debt_to_income_ratio.append(random.uniform(0.25, 0.4))
        collateral_value_ratio.append(random.uniform(0.25, 5))
    elif risk_category[index] == 'medium':
        netcashflow.append(random.uniform(5000000, 10000000))
        credit_utilization_ratio.append(random.uniform(0.4, 0.7))
        debt_to_income_ratio.append(random.uniform(0.5, 0.6))
        collateral_value_ratio.append(random.uniform(0.5, 0.75))
    elif risk_category[index] == 'high':
        netcashflow.append(random.uniform(500000, 1000000))
        credit_utilization_ratio.append(random.uniform(0.7, 1))
        debt_to_income_ratio.append(random.uniform(0.6, 0.9))
        collateral_value_ratio.append(random.uniform(0.75, 1))
        
df['netcashflow'] = netcashflow
df['credit_utilization_ratio'] = credit_utilization_ratio
df['debt_to_income_ratio'] = debt_to_income_ratio
df['collateral_value_ratio'] = collateral_value_ratio

In [221]:
df['risk_category'] = df['risk_category'].map({'high':'High', 'medium':'Medium', 'low':'Low'})
df = df.rename(columns={'customer_id':'Customer_ID',  'prob_on_time': 'On Time', "prob_30_late": 'DPD 30', "prob_60_late": 'DPD 60', "prob_90_late": "DPD 90", 'risk_category': 'Risk Category', 'credit_score': 'Credit Score', 'limit_amount': 'Limit Amount', 'probability_of_default': 'Probability of Default', 'netcashflow': 'Net Cash-flow', 'credit_utilization_ratio': 'Credit Utilization', 'debt_to_income_ratio': 'Debt to Income', "collateral_value_ratio": 'Loan to Value'})
df = df.rename(columns={"credit_utilization_ratio": 'Credit Utilization Ratio' , "debt_to_income_ratio": "Debt To Income Ratio",  "collateral_value_ratio" : 'Collateral Value Ratio'})

In [172]:
# df.to_csv('customer5.csv', index=False)

### 2. Demographics and KYC

In [222]:
customer_ids = np.arange(1, 1501)

education_levels = ['Intermediate/A-level', 'Masters or Higher', 'Bachelors', 'Matric/O-level']
cities = ['Karachi', 'Lahore', 'Islamabad', 'Faisalabad', 'Multan']
marital_statuses = ['Married', 'Single', 'Widowed']
businesses = ['Food manufacturing', 'Cotton', 'Vegetable Oil and Ghee Mills', 'Crude Oil Refining', 'Casting of Iron & Steel', 'Mining & Minerals', 'Chemicals & Fertilizers', 'Food and Beverages']

ages = np.random.normal(40, 5, size=1500).astype(int)
edu_frequencies = [0.3, 0.2, 0.4, 0.1]
city_frequencies = [0.3, 0.3, 0.2, 0.1, 0.1]
marital_frequencies = [0.5, 0.3, 0.2]
bus_frequencies = [0.2, 0.2, 0.2, 0.1, 0.1, 0.1, 0.05, 0.05]

data = []
for customer_id in customer_ids:
    education_level = np.random.choice(education_levels, p=edu_frequencies)
    city = np.random.choice(cities, p=city_frequencies)
    marital_status = np.random.choice(marital_statuses)
    business = np.random.choice(businesses, p=business_frequencies)
    age = np.random.choice(ages)
    data.append((customer_id, education_level, city, age, marital_status, business))

df_kyc = pd.DataFrame(data, columns=['Customer_ID',  'Education Level', 'City', 'Age', 'Marital Status', 'Business'])

In [211]:
# df_kyc.to_csv('cust_kyc.csv', index=False)

# Merge scores and kyc. Both these tables contain unique Customer IDS and so we can safely merge them
# df.merge(df_kyc, left_on ='Customer_ID', right_on='Customer_ID').to_csv('cust_score.csv', index=False)

### 3. Collaterals

In [227]:
collateral_types = ['Real Estate', 'Vehicles', 'Precious Metals', 'Shares and Securities', 'Deposits with ABL']
num_collaterals_distribution = [0.15, 0.35, 0.35, 0.1, 0.05]
collateral_value_range = (500000, 10000000)

data = []
for customer_id in range(1, 1501):
    num_collaterals = np.random.choice([3, 4, 5], p=[0.4, 0.4, 0.2])
    collateral_types_for_customer = np.random.choice(collateral_types, size=num_collaterals, replace=False, p=num_collaterals_distribution)
    collateral_values_for_customer = np.random.randint(*collateral_value_range, size=num_collaterals)
    customer_data = list(zip([customer_id] * num_collaterals, collateral_types_for_customer, collateral_values_for_customer))
    data.extend(customer_data)

df_col = pd.DataFrame(data, columns=['Customer_ID', 'Collateral Type', 'Collateral Value'])

In [216]:
# df_col.to_csv('cust_col.csv', index = False)

### 4. Net Cashflows

In [247]:
start_date = datetime(2020, 1, 1)
end_date = datetime(2023, 12, 31)
num_months = (end_date.year - start_date.year) * 12 + (end_date.month - start_date.month) + 1
date_range = pd.date_range(start=start_date, end=end_date, freq='MS')

num_customers = 1500
customer_ids = np.arange(num_customers) + 1
monthly_predicted = []
monthly_actual = []
for i in range(num_customers):
    monthly_predicted.append(np.random.normal(loc=5000000, scale=100000, size=num_months))
    monthly_actual.append(np.random.normal(loc=5000000, scale=100000, size=num_months))

df_trans = pd.DataFrame({
    'customer_id': np.repeat(customer_ids, num_months),
    'date': np.tile(date_range, num_customers),
    'netcashflow_predicted': np.concatenate(monthly_predicted),
    'netcashflow_actual': np.concatenate(monthly_actual)
})

df_trans.columns = ['Customer_ID', 'Date', 'Net Cashflow Predicted', 'Net Cashflow Actual']
df_trans.to_csv('cust_trans.csv', index = False)

### 5. Final Data

In [9]:
df = pd.read_csv('cust_score.csv')
df_trans = pd.read_csv('cust_trans.csv')
df_col = pd.read_csv('cust_col.csv')
df_kyc = pd.read_csv('cust_kyc.csv')
df_filter = pd.read_csv('customer_filter.csv')
df_trajec = pd.read_csv('trajectory.csv')

In [None]:
result_df = pd.concat(df_list, ignore_index=True)

In [None]:
df = pd.DataFrame({
    'Customer_ID': [1, 2, 3, 4, 5],
    'Credit Score': [95, 35, 13, 92, 86],
    'Limit Amount': [37913841, 841451, 967638, 16510378, 15064371],
    'Probability of Default': [0.197477, 0.892358, 0.743595, 0.069763, 0.207578],
    'Credit Utilization': [0.315243, 0.781003, 0.802071, 0.483954, 0.295304],
    'Debt to Income': [0.256476, 0.799984, 0.776986, 0.274977, 0.355297],
    'Loan to Value': [2.016368, 0.793310, 0.854931, 4.500609, 0.288786]
})

df_list = []
for i in range(6):
    date = datetime(2023, 6, 1) + timedelta(days=i)
    df_copy = df.copy()
    df_copy['Date'] = date.strftime('%Y-%d-%m')
    df_list.append(df_copy)
    
result_df = pd.concat(df_list, ignore_index=True)
result_df

#### Community Detection Visuals

In [3]:
import networkx as nx
from pyvis.network import Network
import pandas as pd
import numpy as np
import streamlit as st
import streamlit.components.v1 as components
# from model import run_community_detection, run_community_detection_v2
import plotly.express as px
import plotly.graph_objs as go
import json

In [7]:
df = pd.read_csv('E:\Community Detection\data\community14.csv')

In [36]:
def visualize_network(community, colors_dict, shape_dict):

    net = Network(height="1000px", width="1500px", font_color="white", bgcolor='rgb(14, 17, 23)', select_menu=True, filter_menu=True,  cdn_resources='remote', notebook=True)
    sources =  community['source']
    targets =  community['target']
    # weights =  community['weight']

    for col in zip(sources, targets):

        src =   col[0]
        dst =   col[1]
        # w =     col[2]
        
        # net.add_node(src, src, title=src)
        # net.add_node(dst, dst, title=dst)
        net.add_node(src, src, title=src, color=colors_dict['Supplier'], borderWidth=4, borderColor='black', shape=shape_dict['Supplier'])
        net.add_node(dst, dst, title=dst, color=colors_dict['Buyer'], borderWidth=4, borderColor='black', shape=shape_dict['Buyer'])
        # net.add_edge(src, dst, value=w)
        net.add_edge(src, dst)

    neighbor_map = net.get_adj_list()
    for node in net.nodes:
                node["title"] += " Neighbors: " + "\n".join(neighbor_map[node["id"]])
                node["value"] = len(neighbor_map[node["id"]])

    # # Set the options for the network
    # net.set_options(json_object)
    net.show_buttons(filter_=['nodes', 'edges', 'physics'])
    net.show("network.html")
    HtmlFile = open("network.html", 'r', encoding='utf-8')
    source_code = HtmlFile.read() 
    # return components.html(source_code, height = 3000, width=1500)
    return  net.show("network.html")

In [16]:
df = df.drop_duplicates(subset = ['target', 'source'])

In [17]:
df.to_csv("community.csv", index=False)

In [43]:
df1 = pd.read_csv('E:\Community Detection\data\community14_works.csv')
df2 = pd.read_csv('E:\Community Detection\data\community14.csv')

In [44]:
colors_dict = {'Buyer': '#de1a24', 'Supplier': '#1f77b4'}
shape_dict = {'Supplier': 'dot', 'Buyer': 'triangle'}

In [None]:
df1.drop_duplicates(subset=['target', 'source'])

In [55]:
visualize_network(df1.drop_duplicates(subset=['target', 'source']), colors_dict, shape_dict)