In [1]:
# --- 1. SETUP & FUNCTION DEFINITIONS ---

# Import libraries
import pandas as pd
import numpy as np
import requests
import time
from datetime import datetime, timezone
import collections
import joblib
import os
import warnings

# Suppress warnings for a clean output
warnings.filterwarnings('ignore', category=UserWarning)

# IMPORTANT: This function is required for joblib to load the pipeline
def cyclical_encoder(X):
    X_encoded = np.array([])
    max_vals = [23, 6, 12]
    for i in range(X.shape[1]):
        col_data = X[:, i:i+1]
        max_val = max_vals[i]
        X_sin = np.sin(2 * np.pi * col_data / (max_val + 1))
        X_cos = np.cos(2 * np.pi * col_data / (max_val + 1))
        if X_encoded.size == 0:
            X_encoded = np.concatenate([X_sin, X_cos], axis=1)
        else:
            X_encoded = np.concatenate([X_encoded, X_sin, X_cos], axis=1)
    return X_encoded

# --- Blockchain Data Fetching Functions ---
NODE_URL = "https://fullnode.mainnet.aptoslabs.com/v1"

def get_all_transactions(session, address):
    # ... (Hàm này giữ nguyên)
    all_transactions = []
    start = 0
    limit = 100
    while True:
        params = {'start': start, 'limit': limit}
        try:
            response = session.get(f"{NODE_URL}/accounts/{address}/transactions", params=params)
            response.raise_for_status()
            transactions = response.json()
            if not transactions: break
            all_transactions.extend(transactions)
            start += len(transactions)
            if len(transactions) < limit: break
            time.sleep(0.1)
        except requests.exceptions.RequestException:
            break
    return all_transactions

def get_wallet_resources(session, address):
    # ... (Hàm này giữ nguyên)
    try:
        response = session.get(f"{NODE_URL}/accounts/{address}/resources")
        response.raise_for_status()
        return response.json()
    except requests.exceptions.RequestException:
        return []

# NÂNG CẤP: Hàm này được sửa lại để trả về cả DataFrame cho model và Dictionary để ghi log
def create_wallet_data(session, address):
    all_transactions = get_all_transactions(session, address)
    resources = get_wallet_resources(session, address)

    # Dữ liệu mặc định
    profile_dict = {
        'wallet_address': address, 'label': -1, # label sẽ được cập nhật sau khi dự đoán
        'first_transaction_date': None, 'wallet_age_days': 0, 'apt_balance': 0,
        'other_token_count': 0, 'total_transaction_count': 0, 'successful_transaction_count': 0,
        'failed_transaction_count': 0, 'unique_interacted_contracts': 0,
        'unique_interacted_addresses': 0, 'avg_time_between_tx_seconds': -1,
        'std_dev_time_between_tx_seconds': -1, 'most_active_hour': -1,
        'most_active_weekday': "N/A", 'is_self_funded': 1
    }

    if not all_transactions:
        print(f"⚠️ WARNING: No transactions found for wallet {address}")
        # Trả về cả hai định dạng
        return pd.DataFrame([profile_dict]), profile_dict

    # Tính toán các feature
    profile_dict['total_transaction_count'] = len(all_transactions)
    profile_dict['successful_transaction_count'] = sum(1 for tx in all_transactions if tx['success'])
    profile_dict['failed_transaction_count'] = profile_dict['total_transaction_count'] - profile_dict['successful_transaction_count']

    first_tx = all_transactions[-1]
    first_tx_timestamp = int(first_tx['timestamp']) // 1000000
    creation_datetime = datetime.fromtimestamp(first_tx_timestamp, tz=timezone.utc)
    profile_dict['first_transaction_date'] = creation_datetime.strftime('%Y-%m-%d %H:%M:%S UTC')
    profile_dict['wallet_age_days'] = (datetime.now(timezone.utc) - creation_datetime).days

    timestamps = sorted([int(tx['timestamp']) // 1000000 for tx in all_transactions])
    if len(timestamps) > 1:
        time_diffs = np.diff(timestamps)
        profile_dict['avg_time_between_tx_seconds'] = float(np.mean(time_diffs))
        profile_dict['std_dev_time_between_tx_seconds'] = float(np.std(time_diffs))

    hours = [datetime.fromtimestamp(ts, tz=timezone.utc).hour for ts in timestamps]
    weekdays = [datetime.fromtimestamp(ts, tz=timezone.utc).strftime('%A') for ts in timestamps]
    if hours:
        profile_dict['most_active_hour'] = collections.Counter(hours).most_common(1)[0][0]
    if weekdays:
        profile_dict['most_active_weekday'] = collections.Counter(weekdays).most_common(1)[0][0]

    if first_tx.get('sender') != address:
        profile_dict['is_self_funded'] = 0

    apt_balance = 0
    other_token_count = 0
    for resource in resources:
        if resource['type'] == '0x1::coin::CoinStore<0x1::aptos_coin::AptosCoin>':
            apt_balance = int(resource['data']['coin']['value']) / 10**8
        elif resource['type'].startswith('0x1::coin::CoinStore<'):
            other_token_count += 1
    profile_dict['apt_balance'] = apt_balance
    profile_dict['other_token_count'] = other_token_count

    interacted_contracts = {tx['payload']['function'].split('::')[0] for tx in all_transactions if tx.get('payload') and tx['payload'].get('function')}
    profile_dict['unique_interacted_contracts'] = len(interacted_contracts)

    interacted_addresses = {arg for tx in all_transactions if tx.get('payload') and tx['payload'].get('arguments') for arg in tx['payload']['arguments'] if isinstance(arg, str) and arg.startswith('0x') and len(arg) > 40}
    profile_dict['unique_interacted_addresses'] = len(interacted_addresses - {address})

    # Tạo DataFrame cho mô hình (sẽ có thêm các cột phụ sau này)
    model_input_df = pd.DataFrame([profile_dict])

    # Tạo các cột phụ cần thiết cho mô hình từ các dữ liệu đã tính
    model_input_df['tx_day_of_week'] = creation_datetime.weekday()
    model_input_df['tx_month'] = creation_datetime.month
    model_input_df['tx_day_of_month'] = creation_datetime.day
    model_input_df['success_rate'] = profile_dict['successful_transaction_count'] / (profile_dict['total_transaction_count'] + 1e-6)
    model_input_df['new_contract_rate'] = profile_dict['unique_interacted_contracts'] / (profile_dict['total_transaction_count'] + 1e-6)
    model_input_df['balance_per_tx'] = profile_dict['apt_balance'] / (profile_dict['total_transaction_count'] + 1e-6)

    return model_input_df, profile_dict

print("✅ All necessary functions are defined and ready.")

✅ All necessary functions are defined and ready.


In [2]:
# --- 2. LOAD THE PRE-TRAINED AI PIPELINE ---

PIPELINE_PATH = 'C:/A-A-C/models/aptos_pro_pipeline.joblib'

try:
    loaded_pipeline = joblib.load(PIPELINE_PATH)
    print("✅ AI Pipeline loaded successfully.")
    print("   You can now proceed to Cell 3 to predict any wallet.")
except FileNotFoundError:
    print(f"❌ ERROR: Pipeline file not found at '{PIPELINE_PATH}'.")
    print("   Please run the 'train_aptos.ipynb' notebook first to create the pipeline file.")
    loaded_pipeline = None

✅ AI Pipeline loaded successfully.
   You can now proceed to Cell 3 to predict any wallet.


In [3]:
# --- 3. LIVE WALLET PREDICTOR & LOGGER ---

# ==================================================================
# ==> INPUT THE WALLET ADDRESS YOU WANT TO ANALYZE HERE <==
# ==================================================================
wallet_to_predict = "0x983fe110ab9a83bd43da49260d064017af8527826957e7ebd057ba50a7b2458e g"
# ==================================================================
# ==> DEFINE THE LOG FILE PATH <==
# ==================================================================
LOG_CSV_PATH = 'C:/A-A-C/data/raw/prediction_log.csv'
# ==================================================================


if 'loaded_pipeline' in locals() and loaded_pipeline is not None:
    print(f"--- 1. Fetching live data for wallet: {wallet_to_predict[:15]}... ---")
    session = requests.Session()

    # Lấy cả DataFrame cho model và Dictionary để ghi log
    model_input_df, log_data_dict = create_wallet_data(session, wallet_to_predict)

    print("\n--- 2. Running prediction ---")
    prediction = loaded_pipeline.predict(model_input_df)

    # --- 3. Display simple result on screen ---
    result_label = "SYBIL (Suspicious)" if prediction[0] == 1 else "NORMAL (Likely a regular user)"
    print("\n" + "="*30)
    print("--- PREDICTION RESULT ---")
    print(f"  Wallet: {wallet_to_predict}")
    print(f"  Result: {result_label}")
    print("="*30)

    # --- 4. Prepare and save detailed data to log file ---
    # Cập nhật nhãn dự đoán vào dictionary
    log_data_dict['label'] = prediction[0]

    # Chọn ra chính xác các cột bạn yêu cầu để ghi log
    columns_to_log = [
        'wallet_address', 'label', 'first_transaction_date', 'wallet_age_days',
        'apt_balance', 'other_token_count', 'total_transaction_count',
        'successful_transaction_count', 'failed_transaction_count',
        'unique_interacted_contracts', 'unique_interacted_addresses',
        'avg_time_between_tx_seconds', 'std_dev_time_between_tx_seconds',
        'most_active_hour', 'most_active_weekday', 'is_self_funded'
    ]

    # Tạo DataFrame chỉ với các cột cần ghi log
    log_df = pd.DataFrame([log_data_dict])[columns_to_log]

    try:
        file_exists = os.path.isfile(LOG_CSV_PATH)
        # Dùng mode 'a' (append) để ghi thêm vào cuối file
        log_df.to_csv(LOG_CSV_PATH, mode='a', header=not file_exists, index=False)
        print(f"\n✅ Successfully logged detailed features to: {LOG_CSV_PATH}")
    except Exception as e:
        print(f"\n❌ ERROR: Could not write to log file. Details: {e}")

else:
    print("\n❌ Cannot proceed. Please run Cell 2 to load the AI pipeline first.")

--- 1. Fetching live data for wallet: 0x983fe110ab9a8... ---

--- 2. Running prediction ---

--- PREDICTION RESULT ---
  Wallet: 0x983fe110ab9a83bd43da49260d064017af8527826957e7ebd057ba50a7b2458e
  Result: SYBIL (Suspicious)

✅ Successfully logged detailed features to: C:/A-A-C/data/raw/prediction_log.csv
