# Smart Contract Vulnerability Detection Dataset Analysis

This notebook provides a comprehensive analysis of the smart contract vulnerability detection dataset. The dataset contains various types of vulnerabilities including:
- Overflow-Underflow
- Re-entrancy
- Timestamp-Dependency
- TOD (Transaction Order Dependency)
- tx.origin
- Unchecked-Send
- Unhandled-Exceptions

Each vulnerability category contains Solidity smart contract files (.sol) and corresponding bug log CSV files that track vulnerability locations and characteristics.

## 1. Import Required Libraries

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import os
import glob
from collections import Counter, defaultdict
import warnings
warnings.filterwarnings('ignore')

# Set plot style
plt.style.use('seaborn-v0_8')
sns.set_palette("husl")

# Configure matplotlib
plt.rcParams['figure.figsize'] = (12, 8)
plt.rcParams['font.size'] = 10

print("Libraries imported successfully!")
print("Working directory:", os.getcwd())

## 2. Load Dataset from Folder

In [None]:
# Define dataset paths
dataset_path = './dataset'
buggy_contracts_path = os.path.join(dataset_path, 'buggy_contracts')

# Get all vulnerability categories
vulnerability_types = [d for d in os.listdir(buggy_contracts_path) 
                      if os.path.isdir(os.path.join(buggy_contracts_path, d))]

print("Vulnerability types found:")
for i, vuln_type in enumerate(vulnerability_types, 1):
    print(f"{i}. {vuln_type}")

print(f"\nTotal vulnerability categories: {len(vulnerability_types)}")

# Function to load all CSV bug logs
def load_bug_logs():
    """Load all BugLog CSV files from all vulnerability categories"""
    all_bug_data = []
    
    for vuln_type in vulnerability_types:
        vuln_path = os.path.join(buggy_contracts_path, vuln_type)
        csv_files = glob.glob(os.path.join(vuln_path, 'BugLog_*.csv'))
        
        for csv_file in csv_files:
            try:
                df = pd.read_csv(csv_file)
                df['vulnerability_type'] = vuln_type
                df['file_name'] = os.path.basename(csv_file)
                all_bug_data.append(df)
            except Exception as e:
                print(f"Error loading {csv_file}: {e}")
    
    return pd.concat(all_bug_data, ignore_index=True) if all_bug_data else pd.DataFrame()

# Function to get contract file information
def get_contract_info():
    """Get information about Solidity contract files"""
    contract_info = []
    
    for vuln_type in vulnerability_types:
        vuln_path = os.path.join(buggy_contracts_path, vuln_type)
        sol_files = glob.glob(os.path.join(vuln_path, '*.sol'))
        
        for sol_file in sol_files:
            try:
                with open(sol_file, 'r', encoding='utf-8') as f:
                    content = f.read()
                    line_count = len(content.split('\n'))
                    char_count = len(content)
                
                contract_info.append({
                    'file_path': sol_file,
                    'file_name': os.path.basename(sol_file),
                    'vulnerability_type': vuln_type,
                    'line_count': line_count,
                    'char_count': char_count,
                    'file_size_kb': os.path.getsize(sol_file) / 1024
                })
            except Exception as e:
                print(f"Error processing {sol_file}: {e}")
    
    return pd.DataFrame(contract_info)

# Load the data
print("Loading bug log data...")
bug_logs_df = load_bug_logs()

print("Loading contract information...")
contracts_df = get_contract_info()

print(f"\nData loaded successfully!")
print(f"Bug logs shape: {bug_logs_df.shape}")
print(f"Contracts info shape: {contracts_df.shape}")

## 3. Basic Dataset Information

In [None]:
print("=== BUG LOGS DATASET ===")
print(f"Shape: {bug_logs_df.shape}")
print(f"Columns: {list(bug_logs_df.columns)}")
print("\nFirst 5 rows:")
print(bug_logs_df.head())

print("\n" + "="*50)
print("=== CONTRACTS DATASET ===")
print(f"Shape: {contracts_df.shape}")
print(f"Columns: {list(contracts_df.columns)}")
print("\nFirst 5 rows:")
print(contracts_df.head())

print("\n" + "="*50)
print("=== DATA TYPES ===")
print("\nBug Logs Data Types:")
print(bug_logs_df.dtypes)
print("\nContracts Data Types:")
print(contracts_df.dtypes)

## 4. Dataset Shape and Structure

In [None]:
# Dataset dimensions and structure analysis
print("=== DATASET DIMENSIONS ===")
print(f"Bug Logs Dataset:")
print(f"  - Total vulnerabilities logged: {len(bug_logs_df)}")
print(f"  - Number of features: {len(bug_logs_df.columns)}")

print(f"\nContracts Dataset:")
print(f"  - Total contracts: {len(contracts_df)}")
print(f"  - Number of features: {len(contracts_df.columns)}")

# Count contracts and bug logs per vulnerability type
vuln_counts = bug_logs_df['vulnerability_type'].value_counts()
contract_counts = contracts_df['vulnerability_type'].value_counts()

print("\n=== VULNERABILITY TYPE DISTRIBUTION ===")
comparison_df = pd.DataFrame({
    'Vulnerability_Type': vuln_counts.index,
    'Bug_Logs_Count': vuln_counts.values,
    'Contract_Files_Count': contract_counts.values
})
print(comparison_df)

print("\n=== SAMPLE DATA PREVIEW ===")
print("Last 3 rows of Bug Logs:")
print(bug_logs_df.tail(3))

print("\nLast 3 rows of Contracts:")
print(contracts_df.tail(3))

## 5. Missing Values Analysis

In [None]:
# Check for missing values
print("=== MISSING VALUES ANALYSIS ===")

print("Bug Logs Dataset Missing Values:")
bug_missing = bug_logs_df.isnull().sum()
bug_missing_pct = (bug_missing / len(bug_logs_df)) * 100
bug_missing_summary = pd.DataFrame({
    'Column': bug_missing.index,
    'Missing_Count': bug_missing.values,
    'Missing_Percentage': bug_missing_pct.values
})
print(bug_missing_summary)

print("\nContracts Dataset Missing Values:")
contract_missing = contracts_df.isnull().sum()
contract_missing_pct = (contract_missing / len(contracts_df)) * 100
contract_missing_summary = pd.DataFrame({
    'Column': contract_missing.index,
    'Missing_Count': contract_missing.values,
    'Missing_Percentage': contract_missing_pct.values
})
print(contract_missing_summary)

# Visualize missing values if any exist
fig, axes = plt.subplots(1, 2, figsize=(15, 6))

# Bug logs missing values heatmap
if bug_logs_df.isnull().sum().sum() > 0:
    sns.heatmap(bug_logs_df.isnull(), ax=axes[0], cbar=True, yticklabels=False)
    axes[0].set_title('Missing Values in Bug Logs Dataset')
else:
    axes[0].text(0.5, 0.5, 'No Missing Values', ha='center', va='center', transform=axes[0].transAxes)
    axes[0].set_title('Missing Values in Bug Logs Dataset')

# Contracts missing values heatmap
if contracts_df.isnull().sum().sum() > 0:
    sns.heatmap(contracts_df.isnull(), ax=axes[1], cbar=True, yticklabels=False)
    axes[1].set_title('Missing Values in Contracts Dataset')
else:
    axes[1].text(0.5, 0.5, 'No Missing Values', ha='center', va='center', transform=axes[1].transAxes)
    axes[1].set_title('Missing Values in Contracts Dataset')

plt.tight_layout()
plt.show()

## 6. Data Types and Memory Usage

In [None]:
# Data types and memory usage analysis
print("=== BUG LOGS DATASET - DATA TYPES AND MEMORY ===")
print(bug_logs_df.info(memory_usage='deep'))

print("\n=== CONTRACTS DATASET - DATA TYPES AND MEMORY ===")
print(contracts_df.info(memory_usage='deep'))

# Analyze unique values in categorical columns
print("\n=== UNIQUE VALUES ANALYSIS ===")

print("Bug Logs - Categorical Columns:")
for col in bug_logs_df.select_dtypes(include=['object']).columns:
    unique_count = bug_logs_df[col].nunique()
    print(f"  {col}: {unique_count} unique values")
    if unique_count <= 20:  # Show unique values if not too many
        print(f"    Values: {sorted(bug_logs_df[col].unique())}")

print("\nContracts - Categorical Columns:")
for col in contracts_df.select_dtypes(include=['object']).columns:
    unique_count = contracts_df[col].nunique()
    print(f"  {col}: {unique_count} unique values")
    if unique_count <= 20:  # Show unique values if not too many
        print(f"    Values: {sorted(contracts_df[col].unique())}")

# Memory optimization suggestions
print("\n=== MEMORY OPTIMIZATION OPPORTUNITIES ===")
for df_name, df in [("Bug Logs", bug_logs_df), ("Contracts", contracts_df)]:
    print(f"\n{df_name} Dataset:")
    for col in df.columns:
        if df[col].dtype == 'int64':
            col_min, col_max = df[col].min(), df[col].max()
            if col_min >= 0 and col_max <= 255:
                print(f"  {col}: Can be optimized to uint8")
            elif col_min >= -128 and col_max <= 127:
                print(f"  {col}: Can be optimized to int8")
            elif col_min >= 0 and col_max <= 65535:
                print(f"  {col}: Can be optimized to uint16")
        elif df[col].dtype == 'float64':
            print(f"  {col}: Can potentially be optimized to float32")

## 7. Statistical Summary

In [None]:
# Statistical summaries for numerical and categorical features
print("=== BUG LOGS STATISTICAL SUMMARY ===")
print("\nNumerical Features:")
print(bug_logs_df.describe())

print("\nCategorical Features:")
for col in bug_logs_df.select_dtypes(include=['object']).columns:
    print(f"\n{col}:")
    value_counts = bug_logs_df[col].value_counts()
    print(value_counts)

print("\n" + "="*60)
print("=== CONTRACTS STATISTICAL SUMMARY ===")
print("\nNumerical Features:")
print(contracts_df.describe())

print("\nCategorical Features:")
for col in contracts_df.select_dtypes(include=['object']).columns:
    print(f"\n{col}:")
    value_counts = contracts_df[col].value_counts()
    print(value_counts.head(10))  # Show top 10 to avoid too much output

# Additional statistical insights
print("\n=== ADDITIONAL INSIGHTS ===")

print("\nBug Logs Insights:")
if 'loc' in bug_logs_df.columns:
    print(f"  - Vulnerability locations range from {bug_logs_df['loc'].min()} to {bug_logs_df['loc'].max()}")
    print(f"  - Average vulnerability location: {bug_logs_df['loc'].mean():.2f}")

if 'length' in bug_logs_df.columns:
    print(f"  - Vulnerability lengths range from {bug_logs_df['length'].min()} to {bug_logs_df['length'].max()}")
    print(f"  - Average vulnerability length: {bug_logs_df['length'].mean():.2f}")

print("\nContract File Insights:")
print(f"  - Contract sizes range from {contracts_df['line_count'].min()} to {contracts_df['line_count'].max()} lines")
print(f"  - Average contract size: {contracts_df['line_count'].mean():.2f} lines")
print(f"  - File sizes range from {contracts_df['file_size_kb'].min():.2f} to {contracts_df['file_size_kb'].max():.2f} KB")
print(f"  - Average file size: {contracts_df['file_size_kb'].mean():.2f} KB")

## 8. Data Visualization

In [None]:
# Create comprehensive visualizations
fig, axes = plt.subplots(2, 3, figsize=(18, 12))

# 1. Vulnerability type distribution
vuln_counts = bug_logs_df['vulnerability_type'].value_counts()
axes[0, 0].pie(vuln_counts.values, labels=vuln_counts.index, autopct='%1.1f%%', startangle=90)
axes[0, 0].set_title('Distribution of Vulnerability Types')

# 2. Contract file counts by vulnerability type
contract_counts = contracts_df['vulnerability_type'].value_counts()
axes[0, 1].bar(range(len(contract_counts)), contract_counts.values)
axes[0, 1].set_xticks(range(len(contract_counts)))
axes[0, 1].set_xticklabels(contract_counts.index, rotation=45, ha='right')
axes[0, 1].set_title('Number of Contract Files by Vulnerability Type')
axes[0, 1].set_ylabel('Number of Files')

# 3. Contract line count distribution
axes[0, 2].hist(contracts_df['line_count'], bins=30, alpha=0.7, edgecolor='black')
axes[0, 2].set_title('Distribution of Contract File Line Counts')
axes[0, 2].set_xlabel('Number of Lines')
axes[0, 2].set_ylabel('Frequency')

# 4. Bug location distribution (if available)
if 'loc' in bug_logs_df.columns:
    axes[1, 0].hist(bug_logs_df['loc'], bins=30, alpha=0.7, color='orange', edgecolor='black')
    axes[1, 0].set_title('Distribution of Bug Locations')
    axes[1, 0].set_xlabel('Line Number')
    axes[1, 0].set_ylabel('Frequency')

# 5. Bug length distribution (if available)
if 'length' in bug_logs_df.columns:
    axes[1, 1].hist(bug_logs_df['length'], bins=20, alpha=0.7, color='green', edgecolor='black')
    axes[1, 1].set_title('Distribution of Bug Lengths')
    axes[1, 1].set_xlabel('Length (Lines)')
    axes[1, 1].set_ylabel('Frequency')

# 6. File size distribution
axes[1, 2].hist(contracts_df['file_size_kb'], bins=30, alpha=0.7, color='purple', edgecolor='black')
axes[1, 2].set_title('Distribution of Contract File Sizes')
axes[1, 2].set_xlabel('File Size (KB)')
axes[1, 2].set_ylabel('Frequency')

plt.tight_layout()
plt.show()

In [None]:
# Additional detailed visualizations
fig, axes = plt.subplots(2, 2, figsize=(15, 10))

# 1. Box plot of contract line counts by vulnerability type
contracts_df.boxplot(column='line_count', by='vulnerability_type', ax=axes[0, 0])
axes[0, 0].set_title('Contract Line Count Distribution by Vulnerability Type')
axes[0, 0].set_xlabel('Vulnerability Type')
axes[0, 0].set_ylabel('Line Count')
plt.setp(axes[0, 0].xaxis.get_majorticklabels(), rotation=45, ha='right')

# 2. Box plot of file sizes by vulnerability type
contracts_df.boxplot(column='file_size_kb', by='vulnerability_type', ax=axes[0, 1])
axes[0, 1].set_title('File Size Distribution by Vulnerability Type')
axes[0, 1].set_xlabel('Vulnerability Type')
axes[0, 1].set_ylabel('File Size (KB)')
plt.setp(axes[0, 1].xaxis.get_majorticklabels(), rotation=45, ha='right')

# 3. Vulnerability count per file (if multiple vulnerabilities per file)
if 'file_name' in bug_logs_df.columns:
    vuln_per_file = bug_logs_df.groupby('file_name').size()
    axes[1, 0].hist(vuln_per_file, bins=20, alpha=0.7, color='red', edgecolor='black')
    axes[1, 0].set_title('Number of Vulnerabilities per File')
    axes[1, 0].set_xlabel('Number of Vulnerabilities')
    axes[1, 0].set_ylabel('Number of Files')

# 4. Approach distribution (if available)
if 'approach' in bug_logs_df.columns:
    approach_counts = bug_logs_df['approach'].value_counts()
    axes[1, 1].pie(approach_counts.values, labels=approach_counts.index, autopct='%1.1f%%')
    axes[1, 1].set_title('Distribution of Bug Introduction Approaches')

plt.tight_layout()
plt.show()

## 9. Correlation Analysis

In [None]:
# Correlation analysis for numerical features
print("=== CORRELATION ANALYSIS ===")

# Bug logs correlation matrix
print("Bug Logs Numerical Features Correlation:")
bug_logs_numeric = bug_logs_df.select_dtypes(include=[np.number])
if not bug_logs_numeric.empty:
    bug_corr = bug_logs_numeric.corr()
    print(bug_corr)
    
    plt.figure(figsize=(10, 8))
    sns.heatmap(bug_corr, annot=True, cmap='coolwarm', center=0, 
                square=True, cbar_kws={'shrink': 0.8})
    plt.title('Correlation Matrix - Bug Logs Numerical Features')
    plt.tight_layout()
    plt.show()
else:
    print("No numerical features found in bug logs dataset")

print("\n" + "="*50)

# Contracts correlation matrix
print("Contracts Numerical Features Correlation:")
contracts_numeric = contracts_df.select_dtypes(include=[np.number])
if not contracts_numeric.empty:
    contracts_corr = contracts_numeric.corr()
    print(contracts_corr)
    
    plt.figure(figsize=(10, 8))
    sns.heatmap(contracts_corr, annot=True, cmap='coolwarm', center=0,
                square=True, cbar_kws={'shrink': 0.8})
    plt.title('Correlation Matrix - Contracts Numerical Features')
    plt.tight_layout()
    plt.show()
else:
    print("No numerical features found in contracts dataset")

# Cross-dataset analysis
print("\n=== CROSS-DATASET ANALYSIS ===")

# Merge datasets to find relationships
if 'vulnerability_type' in bug_logs_df.columns and 'vulnerability_type' in contracts_df.columns:
    # Create aggregated statistics per vulnerability type
    bug_stats = bug_logs_df.groupby('vulnerability_type').agg({
        'loc': ['count', 'mean', 'std'] if 'loc' in bug_logs_df.columns else 'count',
        'length': ['mean', 'std'] if 'length' in bug_logs_df.columns else 'count'
    }).reset_index()
    
    contract_stats = contracts_df.groupby('vulnerability_type').agg({
        'line_count': ['mean', 'std'],
        'char_count': ['mean', 'std'],
        'file_size_kb': ['mean', 'std']
    }).reset_index()
    
    # Flatten column names
    bug_stats.columns = ['_'.join(col).strip() if col[1] else col[0] for col in bug_stats.columns.values]
    contract_stats.columns = ['_'.join(col).strip() if col[1] else col[0] for col in contract_stats.columns.values]
    
    # Merge for correlation analysis
    merged_stats = pd.merge(bug_stats, contract_stats, on='vulnerability_type', how='inner')
    merged_numeric = merged_stats.select_dtypes(include=[np.number])
    
    if not merged_numeric.empty and len(merged_numeric.columns) > 1:
        merged_corr = merged_numeric.corr()
        print("Cross-dataset correlation:")
        print(merged_corr)
        
        plt.figure(figsize=(12, 10))
        sns.heatmap(merged_corr, annot=True, cmap='coolwarm', center=0,
                    square=True, cbar_kws={'shrink': 0.8})
        plt.title('Cross-Dataset Correlation Matrix')
        plt.tight_layout()
        plt.show()
    else:
        print("Insufficient numerical data for cross-dataset correlation")

## 10. Feature Distribution Analysis

In [None]:
# Feature distribution analysis and outlier detection
print("=== FEATURE DISTRIBUTION ANALYSIS ===")

# Analyze outliers using IQR method
def detect_outliers(df, column):
    """Detect outliers using IQR method"""
    Q1 = df[column].quantile(0.25)
    Q3 = df[column].quantile(0.75)
    IQR = Q3 - Q1
    lower_bound = Q1 - 1.5 * IQR
    upper_bound = Q3 + 1.5 * IQR
    outliers = df[(df[column] < lower_bound) | (df[column] > upper_bound)]
    return outliers, lower_bound, upper_bound

# Outlier analysis for numerical columns
print("OUTLIER ANALYSIS:")

for dataset_name, dataset in [("Bug Logs", bug_logs_df), ("Contracts", contracts_df)]:
    print(f"\n{dataset_name} Dataset:")
    numeric_cols = dataset.select_dtypes(include=[np.number]).columns
    
    for col in numeric_cols:
        outliers, lower, upper = detect_outliers(dataset, col)
        outlier_pct = (len(outliers) / len(dataset)) * 100
        print(f"  {col}:")
        print(f"    - Outliers: {len(outliers)} ({outlier_pct:.2f}%)")
        print(f"    - Valid range: [{lower:.2f}, {upper:.2f}]")

# Distribution plots for key features
fig, axes = plt.subplots(2, 2, figsize=(15, 10))

# Plot distributions with outlier detection
numeric_cols_contracts = contracts_df.select_dtypes(include=[np.number]).columns
numeric_cols_bugs = bug_logs_df.select_dtypes(include=[np.number]).columns

if len(numeric_cols_contracts) > 0:
    col1 = numeric_cols_contracts[0]  # Usually line_count
    axes[0, 0].hist(contracts_df[col1], bins=30, alpha=0.7, edgecolor='black')
    axes[0, 0].axvline(contracts_df[col1].mean(), color='red', linestyle='--', label=f'Mean: {contracts_df[col1].mean():.2f}')
    axes[0, 0].axvline(contracts_df[col1].median(), color='green', linestyle='--', label=f'Median: {contracts_df[col1].median():.2f}')
    axes[0, 0].set_title(f'Distribution of {col1}')
    axes[0, 0].legend()

if len(numeric_cols_contracts) > 1:
    col2 = numeric_cols_contracts[1]  # Usually char_count
    axes[0, 1].hist(contracts_df[col2], bins=30, alpha=0.7, edgecolor='black', color='orange')
    axes[0, 1].axvline(contracts_df[col2].mean(), color='red', linestyle='--', label=f'Mean: {contracts_df[col2].mean():.2f}')
    axes[0, 1].axvline(contracts_df[col2].median(), color='green', linestyle='--', label=f'Median: {contracts_df[col2].median():.2f}')
    axes[0, 1].set_title(f'Distribution of {col2}')
    axes[0, 1].legend()

if len(numeric_cols_bugs) > 0:
    col3 = numeric_cols_bugs[0]  # Usually loc
    axes[1, 0].hist(bug_logs_df[col3], bins=30, alpha=0.7, edgecolor='black', color='green')
    axes[1, 0].axvline(bug_logs_df[col3].mean(), color='red', linestyle='--', label=f'Mean: {bug_logs_df[col3].mean():.2f}')
    axes[1, 0].axvline(bug_logs_df[col3].median(), color='purple', linestyle='--', label=f'Median: {bug_logs_df[col3].median():.2f}')
    axes[1, 0].set_title(f'Distribution of {col3}')
    axes[1, 0].legend()

if len(numeric_cols_bugs) > 1:
    col4 = numeric_cols_bugs[1]  # Usually length
    axes[1, 1].hist(bug_logs_df[col4], bins=30, alpha=0.7, edgecolor='black', color='purple')
    axes[1, 1].axvline(bug_logs_df[col4].mean(), color='red', linestyle='--', label=f'Mean: {bug_logs_df[col4].mean():.2f}')
    axes[1, 1].axvline(bug_logs_df[col4].median(), color='green', linestyle='--', label=f'Median: {bug_logs_df[col4].median():.2f}')
    axes[1, 1].set_title(f'Distribution of {col4}')
    axes[1, 1].legend()

plt.tight_layout()
plt.show()

# Balance analysis for categorical variables
print("\n=== CATEGORICAL VARIABLE BALANCE ANALYSIS ===")

for dataset_name, dataset in [("Bug Logs", bug_logs_df), ("Contracts", contracts_df)]:
    print(f"\n{dataset_name} Dataset:")
    categorical_cols = dataset.select_dtypes(include=['object']).columns
    
    for col in categorical_cols:
        value_counts = dataset[col].value_counts()
        print(f"\n  {col} Balance:")
        for value, count in value_counts.items():
            percentage = (count / len(dataset)) * 100
            print(f"    {value}: {count} ({percentage:.2f}%)")
        
        # Calculate balance ratio (most common / least common)
        if len(value_counts) > 1:
            balance_ratio = value_counts.iloc[0] / value_counts.iloc[-1]
            print(f"    Balance ratio: {balance_ratio:.2f}:1")

print("\n=== SUMMARY FOR ML PREPARATION ===")
print("Key findings for machine learning preparation:")
print("1. Dataset sizes and balance")
print("2. Outlier presence and treatment needs")  
print("3. Feature distributions (normal, skewed, etc.)")
print("4. Missing value patterns")
print("5. Categorical variable encoding requirements")

## Conclusion

This comprehensive analysis of the smart contract vulnerability detection dataset reveals:

### Key Findings:
1. **Dataset Structure**: The dataset contains multiple vulnerability categories with corresponding Solidity contracts and bug logs
2. **Vulnerability Types**: 7 main vulnerability categories are present in the dataset
3. **Data Quality**: Analysis of missing values, outliers, and data consistency
4. **Feature Characteristics**: Understanding of numerical and categorical feature distributions

### Next Steps for ML Pipeline:
1. **Data Preprocessing**: Handle outliers, normalize features, encode categorical variables
2. **Feature Engineering**: Extract additional features from smart contract code
3. **Model Selection**: Choose appropriate algorithms for vulnerability classification
4. **Evaluation**: Implement proper validation strategies for imbalanced datasets

This analysis provides a solid foundation for developing machine learning models for smart contract vulnerability detection.