In [None]:
import json
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from pathlib import Path

def load_data(json_file):
    with open(json_file, 'r') as f:
        return json.load(f)

def extract_from_to_data(data):
    rows = []
    for address_info in data['topAddresses']:
        to_address = address_info['address']
        for from_address, stats in address_info['fromAddresses'].items():
            row = {
                'from_address': from_address,
                'to_address': to_address,
                'total_transactions': stats['totalTransactions'],
                'arbitrage_count': stats['arbitrageCount'],
                'arbitrage_rate': float(stats['arbitrageRate']),
                'total_profit': float(stats['profitStats']['totalProfit']),
                'average_profit': float(stats['profitStats']['averageProfit']),
                'total_gas_cost': float(stats['profitStats']['totalGasCost']),
                'average_gas_cost': float(stats['profitStats']['averageGasCost']),
                'total_gas_used': stats['profitStats']['totalGasUsed'],
                'average_gas_used': stats['profitStats']['averageGasUsed'],
                'pools_match': stats['flagStats']['poolsMatch'],
                'tokens_match': stats['flagStats']['tokensMatch'],
                'amounts_match': stats['flagStats']['amountsMatch'],
                'pools_and_tokens_match': stats['flagStats']['poolsAndTokensMatch'],
                'pools_and_amounts_match': stats['flagStats']['poolsAndAmountsMatch'],
                'tokens_and_amounts_match': stats['flagStats']['tokensAndAmountsMatch'],
                'all_match': stats['flagStats']['allMatch'],
                'all_not_match': stats['flagStats']['allNotMatch']
            }
            rows.append(row)
    return pd.DataFrame(rows)

def create_visualizations(df, output_dir):
    # 设置输出目录
    output_dir = Path(output_dir)
    output_dir.mkdir(parents=True, exist_ok=True)
    
    # 设置图表风格
    plt.style.use('seaborn')
    
    # 1. Total Profit 分布图
    plt.figure(figsize=(12, 6))
    sns.histplot(data=df, x='total_profit', bins=50)
    plt.title('Total Profit Distribution')
    plt.xlabel('Total Profit')
    plt.ylabel('Count')
    plt.savefig(output_dir / 'total_profit_distribution.png')
    plt.close()
    
    # 2. Average Gas Used 分布图
    plt.figure(figsize=(12, 6))
    sns.histplot(data=df, x='average_gas_used', bins=50)
    plt.title('Average Gas Used Distribution')
    plt.xlabel('Average Gas Used')
    plt.ylabel('Count')
    plt.savefig(output_dir / 'average_gas_used_distribution.png')
    plt.close()
    
    # 3. Arbitrage Rate 分布图
    plt.figure(figsize=(12, 6))
    sns.histplot(data=df, x='arbitrage_rate', bins=50)
    plt.title('Arbitrage Rate Distribution')
    plt.xlabel('Arbitrage Rate')
    plt.ylabel('Count')
    plt.savefig(output_dir / 'arbitrage_rate_distribution.png')
    plt.close()
    
    # 4. 散点图：Arbitrage Rate vs Total Profit
    plt.figure(figsize=(12, 6))
    sns.scatterplot(data=df, x='arbitrage_rate', y='total_profit')
    plt.title('Arbitrage Rate vs Total Profit')
    plt.xlabel('Arbitrage Rate')
    plt.ylabel('Total Profit')
    plt.savefig(output_dir / 'arbitrage_rate_vs_profit.png')
    plt.close()

def main():
    # 输入和输出路径
    input_file = '../data/arbitrage_analysis_full/inter_analysis.json'
    output_csv = '../data/arbitrage_analysis_full/arbitrage_analysis.csv'
    output_dir = '../data/arbitrage_analysis_full/visualizations'
    
    # 加载数据
    data = load_data(input_file)
    
    # 转换为DataFrame
    df = extract_from_to_data(data)
    
    # 保存为CSV
    df.to_csv(output_csv, index=False)
    print(f"CSV文件已保存到: {output_csv}")
    
    # 创建可视化
    create_visualizations(df, output_dir)
    print(f"可视化图表已保存到: {output_dir}")

if __name__ == "__main__":