# Get data

In [None]:
import requests
import sys

# API keys
ETHERSCAN_API_KEY = 'ETHERSCAN_API_KEY'
COINGECKO_BASE = 'https://api.coingecko.com/api/v3'

def fetch_etherscan_abi(address: str):
    url = (f"https://api.etherscan.io/api?module=contract&action=getabi"
           f"&address={address}&apikey={ETHERSCAN_API_KEY}")
    return requests.get(url, timeout=10).json()

def fetch_etherscan_source(address: str):
    url = (f"https://api.etherscan.io/api?module=contract&action=getsourcecode"
           f"&address={address}&apikey={ETHERSCAN_API_KEY}")
    return requests.get(url, timeout=10).json()

def check_etherscan_contract(address: str) -> dict:
    """Returns verification status and ABI/sourceinfo"""
    abi_data = fetch_etherscan_abi(address)
    src_data = fetch_etherscan_source(address)

    res = {
        'is_verified': False,
        'has_mint': False,
        'has_blacklist': False,
        'has_setfee': False,
        'has_withdraw': False,
        'has_unlock': False,
        'has_pause': False,
        'has_changefee': False,
        'has_owner': False,
        'OptimizationUsed': ''
    }

    if abi_data.get('status') == '1' and abi_data.get('result') != 'Contract source code not verified':
        res['is_verified'] = True
        abi = abi_data['result'].lower()
        for flag in ['mint', 'blacklist', 'setfee', 'withdraw', 'unlock', 'pause', 'changefee', 'owner']:
            res[f'has_{flag}'] = (flag in abi)

    if src_data.get('status') == '1' and src_data.get('result'):
        info = src_data['result'][0]
        res['OptimizationUsed'] = info.get('OptimizationUsed', '')

    return res

def get_dex_cex_data(token_address: str) -> dict:
    """Returns market data from CoinGecko"""
    try:
        token_data = requests.get(
            f"{COINGECKO_BASE}/coins/ethereum/contract/{token_address}",
            timeout=10
        ).json()
    except:
        token_data = {}

    # Default values
    result = {
        'cex_listings': False,
        'trading_volume_24h': 0.0,
        'price_change_24h': 0.0,
        'price_change_7d': 0.0,
        'large_dumps_detected': False
    }

    cid = token_data.get('id')
    if not cid:
        return result

    # CEX listing
    try:
        tickers = requests.get(f"{COINGECKO_BASE}/coins/{cid}/tickers", timeout=10).json().get('tickers', [])
        result['cex_listings'] = any(t.get('market',{}).get('identifier') in ['binance','kraken','coinbase','huobi','okex']
                                  for t in tickers)
    except:
        pass

    # Market data
    md = token_data.get('market_data') or {}
    result['trading_volume_24h'] = float(md.get('total_volume', {}).get('usd', 0)) or 0.0
    result['price_change_24h'] = float(md.get('price_change_percentage_24h', 0)) or 0.0
    result['price_change_7d'] = float(md.get('price_change_percentage_7d', 0)) or 0.0

    # Large dumps detection
    try:
        chart = requests.get(f"{COINGECKO_BASE}/coins/{cid}/market_chart?vs_currency=usd&days=7", timeout=10).json()
        prices = chart.get('prices', [])
        for i in range(1, len(prices)):
            prev, curr = prices[i-1][1], prices[i][1]
            if prev and ((curr - prev)/prev*100) < -25:
                result['large_dumps_detected'] = True
                break
    except:
        pass

    return result

def get_token_info(contract_address: str):
    """Get all information about token by its contract address"""
    contract_info = check_etherscan_contract(contract_address)
    market_data = get_dex_cex_data(contract_address)

    # Combine and order the results exactly as requested
    ordered_results = {
        'is_verified': contract_info['is_verified'],
        'has_mint': contract_info['has_mint'],
        'has_blacklist': contract_info['has_blacklist'],
        'has_setfee': contract_info['has_setfee'],
        'has_withdraw': contract_info['has_withdraw'],
        'has_unlock': contract_info['has_unlock'],
        'has_pause': contract_info['has_pause'],
        'has_changefee': contract_info['has_changefee'],
        'has_owner': contract_info['has_owner'],
        'OptimizationUsed': contract_info['OptimizationUsed'],
        'cex_listings': market_data['cex_listings'],
        'trading_volume_24h': market_data['trading_volume_24h'],
        'price_change_24h': market_data['price_change_24h'],
        'price_change_7d': market_data['price_change_7d'],
        'large_dumps_detected': market_data['large_dumps_detected']
    }

    return ordered_results

if __name__ == '__main__':
    contract_address = '0x0a07525aa264a3e14cdbdd839b1eda02a34e2778'
    token_info = get_token_info(contract_address)
    print(token_info)

{'is_verified': True, 'has_mint': False, 'has_blacklist': False, 'has_setfee': False, 'has_withdraw': False, 'has_unlock': False, 'has_pause': False, 'has_changefee': False, 'has_owner': True, 'OptimizationUsed': '1', 'cex_listings': False, 'trading_volume_24h': 0.0, 'price_change_24h': 0.0, 'price_change_7d': 0.0, 'large_dumps_detected': False}


## XGBOOST

In [1]:
import pandas as pd
import numpy as np
from xgboost import XGBClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report, accuracy_score
from sklearn.preprocessing import LabelEncoder

In [6]:
df = pd.read_csv('/content/final_train_file.csv')

In [8]:
df.head(5)

Unnamed: 0,label,is_verified,has_mint,has_blacklist,has_setfee,has_withdraw,has_unlock,has_pause,has_changefee,has_owner,OptimizationUsed,cex_listings,trading_volume_24h,price_change_24h,price_change_7d,large_dumps_detected
0,0,1,0,0,0,0,0,0,0,1,1.0,0,26415.0,-12.65765,1.41999,0
1,0,1,0,0,0,0,0,0,0,1,1.0,0,3004.29,-1.47952,-4.13863,0
2,0,1,1,0,0,0,0,0,0,1,1.0,0,3655102.0,4.7484,-4.7371,0
3,0,1,1,0,0,0,0,0,0,1,0.0,0,7.38,0.0,1.38325,0
4,0,1,1,0,0,0,0,1,0,1,0.0,0,124.43,0.0,-6.32648,0


In [24]:
label_encoder = LabelEncoder()

# Разделение на признаки и целевую переменную
X = df.drop('label', axis=1)
y = df['label']

# Преобразование булевых колонок в числовые
bool_cols = X.select_dtypes(include=['bool']).columns
X[bool_cols] = X[bool_cols].astype(int)

# Разделение на тренировочную и тестовую выборки
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Создание и обучение модели XGBoost
model = XGBClassifier(
    objective='binary:logistic',
    n_estimators=100,
    max_depth=3,
    learning_rate=0.1,
    random_state=42
)

model.fit(X_train, y_train)

# Предсказание и оценка модели
y_pred = model.predict(X_test)

print("Accuracy:", accuracy_score(y_test, y_pred))
print("\nClassification Report:")
print(classification_report(y_test, y_pred))

Accuracy: 0.8679245283018868

Classification Report:
              precision    recall  f1-score   support

           0       0.94      0.87      0.91      1638
           1       0.71      0.86      0.77       588

    accuracy                           0.87      2226
   macro avg       0.83      0.86      0.84      2226
weighted avg       0.88      0.87      0.87      2226



In [None]:
import pickle

with open('xgb_model.pkl', 'wb') as f:
    pickle.dump(model, f)