In [1]:
import pandas as pd
df = pd.read_csv("upi.csv")

In [2]:
df["transaction_time"] = pd.to_datetime(df["transaction_time"])

df = df.sort_values(by=["user_id", "transaction_time"])

In [None]:
print(df.head(10))

   transaction_id  user_id  transaction_amount  merchant_id  device_id  \
0               1     1000             1148.81          472       6706   
1               2     1000              845.45          767       6430   
2               3     1000             1164.36          570       5226   
3               4     1000              957.18          649       6138   
4               5     1000             1188.94          227       5183   
5               6     1000             1096.70          443       6394   
6               7     1000             1066.73          282       6517   
7               8     1000             1155.28          995       6719   
8               9     1000              939.56          671       5459   
9              10     1000              970.81          691       6957   

     transaction_time location  is_fraud  fraud_score  
0 2023-01-01 20:58:56  Chennai         0     0.187022  
1 2023-01-02 09:36:33  Chennai         0     0.215825  
2 2023-01-02 16:0

In [4]:
grouped_features = []

In [5]:
for user_id, user_txns in df.groupby("user_id"):
    user_txns = user_txns.reset_index(drop=True)

    # Use all transactions except the last one for features
    history = user_txns.iloc[:-1]
    current = user_txns.iloc[-1]   # latest txn

    # Build feature dict
    time_diffs = history["transaction_time"].diff().dt.total_seconds()
    feature_dict = {
        "user_id": user_id,
        "transaction_id": current["transaction_id"],
        "current_amount": current["transaction_amount"],
        "mean_amount": history["transaction_amount"].mean(),
        "std_amount": history["transaction_amount"].std(),
        "max_amount": history["transaction_amount"].max(),
        "min_amount": history["transaction_amount"].min(),
        "unique_devices": history["device_id"].nunique(),
        "primary_device_ratio": (history["device_id"].value_counts().max() / len(history)) if len(history) > 0 else 1.0,
        "unique_locations": history["location"].nunique(),
        "location_switch_rate": ((history["location"] != history["location"].shift()).mean()) if len(history) > 1 else 0.0,
        "time_diff_mean": time_diffs.mean() if len(history) > 1 else 0.0,
        "time_diff_std": time_diffs.std() if len(history) > 1 else 0.0,
        "min_time_gap": time_diffs.min() if len(history) > 1 else 0.0,
        "txn_count": len(history),
        "is_fraud": int(user_txns["is_fraud"].max())
    }
   



    grouped_features.append(feature_dict)

In [6]:
grouped_df = pd.DataFrame(grouped_features)

In [7]:
grouped_df.head(5)

Unnamed: 0,user_id,transaction_id,current_amount,mean_amount,std_amount,max_amount,min_amount,unique_devices,primary_device_ratio,unique_locations,location_switch_rate,time_diff_mean,time_diff_std,min_time_gap,txn_count,is_fraud
0,1000,19,783.83,1551.085556,2333.778202,10883.86,741.77,18,0.055556,1,0.055556,49584.647059,23340.77032,5052.0,18,1
1,1001,32,1891.62,1938.394167,99.750317,2097.52,1746.4,12,0.083333,1,0.083333,36834.090909,22294.896503,5958.0,12,0
2,1002,51,1603.78,1540.666667,253.946267,2028.6,1128.81,18,0.055556,1,0.055556,42734.764706,28187.013973,2522.0,18,0
3,1003,62,1133.21,1274.256,186.975195,1494.3,933.85,10,0.1,1,0.1,49742.333333,16793.639198,25476.0,10,0
4,1004,75,746.37,999.23,1635.026775,6122.89,10.0,12,0.083333,2,0.25,29171.363636,12017.809977,4982.0,12,1


In [8]:
grouped_df.shape

(5000, 16)

In [9]:
o=0
z=0
for x in grouped_df["is_fraud"]:
    if x==1:
        o = o+1
    else:
        z = z+1

print(o," ",z)

715   4285


In [10]:
grouped_df.to_csv("grouped_upi_dynamic.csv", index=False)