In [1]:
# Import modules
import os
import pandas as pd
from datetime import datetime

In [2]:
# Set feature repository path
repo_path = '/Users/shubhadeepdas/Documents/data_science/projects/genai_transcript/output/feature_repository'
os.makedirs(repo_path, exist_ok=True)

In [3]:
# Define existing model features with simulated importance scores and descriptions
existing_features = [
    {
        'feature_id': 'feat_1',
        'feature_name': 'cust_day_since_last_failed_login',
        'description': 'Days since the customer’s last failed login attempt, indicating potential unauthorized access attempts.',
        'required_raw_variables': 'digital_code,event_date,event_time',
        'importance_score': 0.85,  # High relevance to ATO
        'created_timestamp': datetime.now()
    },
    {
        'feature_id': 'feat_2',
        'feature_name': 'card_cnt_tran_mcc_7d',
        'description': 'Count of card transactions in specific merchant category codes over the last 7 days.',
        'required_raw_variables': 'transaction_id,transaction_date,mcc',
        'importance_score': 0.70,  # Moderate, pattern detection
        'created_timestamp': datetime.now()
    },
    {
        'feature_id': 'feat_3',
        'feature_name': 'cust_cnt_tran_pos_30d',
        'description': 'Count of customer point-of-sale transactions over the last 30 days.',
        'required_raw_variables': 'transaction_id,transaction_date,pos',
        'importance_score': 0.65,  # Moderate, spending behavior
        'created_timestamp': datetime.now()
    },
    {
        'feature_id': 'feat_4',
        'feature_name': 'card_ratio_mcc_1d',
        'description': 'Ratio of transactions in specific merchant category codes over the last 1 day.',
        'required_raw_variables': 'transaction_id,transaction_date,mcc',
        'importance_score': 0.60,  # Moderate, sensitive to changes
        'created_timestamp': datetime.now()
    },
    {
        'feature_id': 'feat_5',
        'feature_name': 'card_cnt_tran_night_90d',
        'description': 'Count of card transactions during nighttime hours over the last 90 days.',
        'required_raw_variables': 'transaction_id,transaction_date,transaction_time',
        'importance_score': 0.55,  # Lower, situational
        'created_timestamp': datetime.now()
    },
    {
        'feature_id': 'feat_6',
        'feature_name': 'cust_day_since_last_mid',
        'description': 'Days since the customer’s last merchant-initiated transaction.',
        'required_raw_variables': 'transaction_id,transaction_date',
        'importance_score': 0.75,  # High, unusual activity
        'created_timestamp': datetime.now()
    },
    {
        'feature_id': 'feat_7',
        'feature_name': 'card_cnt_tran_secure_12h',
        'description': 'Count of secure card transactions (e.g., 3D Secure) in the last 12 hours.',
        'required_raw_variables': 'transaction_id,transaction_date,transaction_time,secure_flag',
        'importance_score': 0.80,  # High, fraud detection
        'created_timestamp': datetime.now()
    },
    {
        'feature_id': 'feat_8',
        'feature_name': 'card_normed_amt_30d',
        'description': 'Normalized transaction amount for the card over the last 30 days.',
        'required_raw_variables': 'transaction_id,transaction_date,transaction_amt',
        'importance_score': 0.70,  # Moderate, spending anomalies
        'created_timestamp': datetime.now()
    }
]

In [4]:
# Create DataFrame for features
features_df = pd.DataFrame(existing_features)
features_df

Unnamed: 0,feature_id,feature_name,description,required_raw_variables,importance_score,created_timestamp
0,feat_1,cust_day_since_last_failed_login,Days since the customer’s last failed login at...,"digital_code,event_date,event_time",0.85,2025-08-01 21:09:22.953135
1,feat_2,card_cnt_tran_mcc_7d,Count of card transactions in specific merchan...,"transaction_id,transaction_date,mcc",0.7,2025-08-01 21:09:22.953144
2,feat_3,cust_cnt_tran_pos_30d,Count of customer point-of-sale transactions o...,"transaction_id,transaction_date,pos",0.65,2025-08-01 21:09:22.953145
3,feat_4,card_ratio_mcc_1d,Ratio of transactions in specific merchant cat...,"transaction_id,transaction_date,mcc",0.6,2025-08-01 21:09:22.953146
4,feat_5,card_cnt_tran_night_90d,Count of card transactions during nighttime ho...,"transaction_id,transaction_date,transaction_time",0.55,2025-08-01 21:09:22.953147
5,feat_6,cust_day_since_last_mid,Days since the customer’s last merchant-initia...,"transaction_id,transaction_date",0.75,2025-08-01 21:09:22.953148
6,feat_7,card_cnt_tran_secure_12h,"Count of secure card transactions (e.g., 3D Se...","transaction_id,transaction_date,transaction_ti...",0.8,2025-08-01 21:09:22.953149
7,feat_8,card_normed_amt_30d,Normalized transaction amount for the card ove...,"transaction_id,transaction_date,transaction_amt",0.7,2025-08-01 21:09:22.953149


In [5]:
# Save to Parquet and CSV for latest features
latest_parquet_path = os.path.join(repo_path, 'features_latest.parquet')
latest_csv_path = os.path.join(repo_path, 'features_latest.csv')
features_df.to_parquet(latest_parquet_path, index=False)
features_df.to_csv(latest_csv_path, index=False)

In [6]:
# Save to Parquet and CSV for version history
history_parquet_path = os.path.join(repo_path, 'features_history.parquet')
history_csv_path = os.path.join(repo_path, 'features_history.csv')
if os.path.exists(history_parquet_path):
    history_df = pd.read_parquet(history_parquet_path)
    features_df = pd.concat([history_df, features_df], ignore_index=True)
features_df.to_parquet(history_parquet_path, index=False)
features_df.to_csv(history_csv_path, index=False)

In [7]:
# Print confirmation
print("Feature Repository Initialized at:", repo_path)
print("\nExisting Features Metadata (Latest):")
features_df

Feature Repository Initialized at: /Users/shubhadeepdas/Documents/data_science/projects/genai_transcript/output/feature_repository

Existing Features Metadata (Latest):


Unnamed: 0,feature_id,feature_name,description,required_raw_variables,importance_score,created_timestamp
0,feat_1,cust_day_since_last_failed_login,Days since the customer’s last failed login at...,"digital_code,event_date,event_time",0.85,2025-08-01 21:09:22.953135
1,feat_2,card_cnt_tran_mcc_7d,Count of card transactions in specific merchan...,"transaction_id,transaction_date,mcc",0.7,2025-08-01 21:09:22.953144
2,feat_3,cust_cnt_tran_pos_30d,Count of customer point-of-sale transactions o...,"transaction_id,transaction_date,pos",0.65,2025-08-01 21:09:22.953145
3,feat_4,card_ratio_mcc_1d,Ratio of transactions in specific merchant cat...,"transaction_id,transaction_date,mcc",0.6,2025-08-01 21:09:22.953146
4,feat_5,card_cnt_tran_night_90d,Count of card transactions during nighttime ho...,"transaction_id,transaction_date,transaction_time",0.55,2025-08-01 21:09:22.953147
5,feat_6,cust_day_since_last_mid,Days since the customer’s last merchant-initia...,"transaction_id,transaction_date",0.75,2025-08-01 21:09:22.953148
6,feat_7,card_cnt_tran_secure_12h,"Count of secure card transactions (e.g., 3D Se...","transaction_id,transaction_date,transaction_ti...",0.8,2025-08-01 21:09:22.953149
7,feat_8,card_normed_amt_30d,Normalized transaction amount for the card ove...,"transaction_id,transaction_date,transaction_amt",0.7,2025-08-01 21:09:22.953149
