In [1]:
import pandas as pd
df = pd.read_csv("upi.csv")

In [3]:
df["transaction_time"] = pd.to_datetime(df["transaction_time"])

df = df.sort_values(by=["user_id", "transaction_time"])

In [4]:
df.head()

Unnamed: 0,transaction_id,user_id,transaction_amount,merchant_id,device_id,transaction_time,location,is_fraud,fraud_score
0,1,1000,793.07,533,5501,2023-01-01 02:56:23,Mumbai,0,0.073449
1,2,1000,988.64,778,6481,2023-01-01 15:21:28,Mumbai,0,0.07435
2,3,1000,931.95,421,5558,2023-01-02 05:27:36,Mumbai,0,0.098312
3,4,1000,766.52,434,6182,2023-01-02 07:22:01,Mumbai,0,0.260803
4,5,1000,1039.5,686,5478,2023-01-02 11:44:42,Mumbai,0,0.171231


In [21]:
grouped_features = []

In [22]:
for user_id, user_txns in df.groupby("user_id"):
    user_txns = user_txns.reset_index(drop=True)

    # Use all transactions except the last one for features
    history = user_txns.iloc[:-1]
    current = user_txns.iloc[-1]   # latest txn

    # Build feature dict
    time_diffs = history["transaction_time"].diff().dt.total_seconds()
    feature_dict = {
        "user_id": user_id,
        "transaction_id": current["transaction_id"],
        "current_amount": current["transaction_amount"],
        "mean_amount": history["transaction_amount"].mean(),
        "std_amount": history["transaction_amount"].std(),
        "max_amount": history["transaction_amount"].max(),
        "min_amount": history["transaction_amount"].min(),
        "unique_devices": history["device_id"].nunique(),
        "primary_device_ratio": (history["device_id"].value_counts().max() / len(history)) if len(history) > 0 else 1.0,
        "unique_locations": history["location"].nunique(),
        "location_switch_rate": ((history["location"] != history["location"].shift()).mean()) if len(history) > 1 else 0.0,
        "time_diff_mean": time_diffs.mean() if len(history) > 1 else 0.0,
        "time_diff_std": time_diffs.std() if len(history) > 1 else 0.0,
        "min_time_gap": time_diffs.min() if len(history) > 1 else 0.0,
        "txn_count": len(history),
        "is_fraud": int(user_txns["is_fraud"].max())
    }
   



    grouped_features.append(feature_dict)

In [23]:
grouped_df = pd.DataFrame(grouped_features)

In [27]:
grouped_df.head(5)

Unnamed: 0,user_id,transaction_id,current_amount,mean_amount,std_amount,max_amount,min_amount,unique_devices,primary_device_ratio,unique_locations,location_switch_rate,time_diff_mean,time_diff_std,min_time_gap,txn_count,is_fraud
0,1000,18,1017.81,924.394118,101.508138,1085.86,766.52,17,0.058824,1,0.058824,38374.0,26170.553529,3120.0,17,0
1,1001,38,2052.98,4913.773158,7701.504183,30345.0,1691.21,19,0.052632,4,0.684211,34300.888889,25936.73717,1979.0,19,1
2,1002,54,1396.9,2716.732667,4194.365383,17831.0,716.39,15,0.066667,2,0.2,38044.142857,23756.303254,657.0,15,1
3,1003,66,1039.15,4480.023636,7452.503497,24103.62,1210.12,11,0.090909,1,0.090909,38022.1,25891.816144,1761.0,11,1
4,1004,81,581.71,625.676429,130.210413,861.08,407.19,14,0.071429,1,0.071429,47364.461538,23709.709016,5547.0,14,0


In [12]:
grouped_df.shape

(2000, 12)

In [29]:
o=0
z=0
for x in grouped_df["is_fraud"]:
    if x==1:
        o = o+1
    else:
        z = z+1

print(o," ",z)

1097   903


In [13]:
grouped_df.to_csv("grouped_upi_dynamic.csv", index=False)