In [None]:
import os
import random
import blocksci
import copy
import numpy as np
import pickle
import tqdm

chain = blocksci.Blockchain("/home/blocksci_data/blocksci.conf")

random.seed(42)    

In [None]:
satoshi2bitcoin = 0.00000001
functions_utxo = []
functions_tx = []
functions_addr = []
functions_path = []


def sign_up_decorator(_functions_list: list):
    def sign_up(module_func):
        _functions_list.append(module_func)
        def run_module(*args):
            return module_func(*args)
        return run_module
    return sign_up


@sign_up_decorator(functions_utxo)
def basic_module_111(output):
    outputs = list(output.tx.outputs)
    outputs.sort(key=lambda output: output.value, reverse=True)
    for rank, _output in enumerate(outputs):
        if _output.index == output.index:
            return output.value, output.value / sum([_output.value for _output in outputs]), rank + 1
    return None, None, None

'''--------------------------------------------------------amount--------------------------------------------------------'''
@sign_up_decorator(functions_tx)
def basic_module_211(tx):
    return tx.input_count, tx.output_count

@sign_up_decorator(functions_tx)
def basic_module_212(tx):
    rs1, rs2 = basic_module_211(tx)
    return rs1 / rs2

@sign_up_decorator(functions_tx)
def basic_module_213(tx):
    return tx.input_value, tx.output_value

@sign_up_decorator(functions_tx)
def basic_module_214(tx):
    inputs_value = [_input.value for _input in tx.inputs]
    output_value = [_output.value for _output in tx.outputs]
    if inputs_value:
        return sum(inputs_value) / len(inputs_value), max(inputs_value), min(inputs_value), \
               sum(output_value) / len(output_value), max(output_value), min(output_value)
    else:
        return 0, 0, 0, sum(output_value) / len(output_value), max(output_value), min(output_value)
    
@sign_up_decorator(functions_tx)
def basic_module_215(tx):
    inputs_value = [_input.value for _input in tx.inputs]
    outputs_value = [_output.value for _output in tx.outputs]
    if inputs_value:
        return max(inputs_value) / sum(inputs_value), min(inputs_value) / sum(inputs_value),\
               max(outputs_value) / sum(outputs_value), min(outputs_value) / sum(outputs_value)
    else:
        return 0, 0, max(outputs_value) / sum(outputs_value), min(outputs_value) / sum(outputs_value)
    
@sign_up_decorator(functions_tx)
def basic_module_217(tx):
    return tx.fee

'''--------------------------------------------------------time--------------------------------------------------------'''
@sign_up_decorator(functions_tx)
def basic_module_221(tx):
    return tx.block_height

@sign_up_decorator(functions_tx)
def basic_module_222(tx):
    inputs_HD = [_input.age for _input in tx.inputs]
    if inputs_HD:
        return sum(inputs_HD) / len(inputs_HD), max(inputs_HD), min(inputs_HD)
    else:
        return 0, 0, 0

# 持有UTXO的地址

'''--------------------------------------------------------amount--------------------------------------------------------'''
@sign_up_decorator(functions_addr)
def basic_module_311(address, height_limit):
    return len([tx for tx in address.txes if tx.block_height <= height_limit])

@sign_up_decorator(functions_addr)
def basic_module_312(address, height_limit):
    ins = [_input for _input in address.inputs if _input.block.height <= height_limit]
    outs = [_output for _output in address.outputs if _output.block.height <= height_limit]
    return len(ins), \
           len(outs)

@sign_up_decorator(functions_addr)
def basic_module_313(address, height_limit):
    txes = [tx for tx in address.txes if tx.block_height <= height_limit]
    ins = [tx.input_count for tx in txes]
    outs = [tx.output_count for tx in txes]
    return sum(ins) / len(ins), max(ins), min(ins),\
           sum(outs) / len(outs), max(outs), min(outs)

@sign_up_decorator(functions_addr)
def basic_module_314(address, height_limit):
    ins_n, outs_n = basic_module_312(address, height_limit)
    return ins_n / outs_n

@sign_up_decorator(functions_addr)
def basic_module_315(address, height_limit):
    txes = [tx for tx in address.txes if tx.block_height <= height_limit]
    values = [tx.input_count / tx.output_count for tx in txes]
    return sum(values) / len(values), max(values), min(values)

@sign_up_decorator(functions_addr)
def basic_module_316(address, height_limit):
    return sum([_input.value for _input in address.inputs if _input.block.height <= height_limit]), \
           sum([_output.value for _output in address.outputs if _output.block.height <= height_limit])

@sign_up_decorator(functions_addr)
def basic_module_317(address, height_limit):
    txes = [tx for tx in address.txes if tx.block_height <= height_limit]
    in_values = [tx.input_value for tx in txes]
    out_values = [tx.output_value for tx in txes]
    return sum(in_values) / len(in_values), max(in_values), min(in_values),\
           sum(out_values) / len(out_values), max(out_values), min(out_values)

@sign_up_decorator(functions_addr)
def basic_module_318(address, height_limit):
    txes = [tx for tx in address.txes if tx.block_height <= height_limit]
    values = [tx.fee for tx in txes]
    return sum(values) / len(values), max(values), min(values)

@sign_up_decorator(functions_addr)
def basic_module_319(address, height_limit):
    return address.balance(height_limit)

'''--------------------------------------------------------time--------------------------------------------------------'''
@sign_up_decorator(functions_addr)
def basic_module_321(address, height_limit):
    txes = [tx for tx in address.txes if tx.block_height <= height_limit]
    active_n = set([tx.block_height for tx in txes])
    return len(active_n)

@sign_up_decorator(functions_addr)
def basic_module_322(address, height_limit):
    txes = [tx for tx in address.txes if tx.block_height <= height_limit]
    tx_count = {}
    for tx in txes:
        if tx.block_height in tx_count:
            tx_count[tx.block_height] += 1
        else:
            tx_count[tx.block_height] = 0
    tx_count = list(tx_count.values())
    return sum(tx_count) / len(tx_count), max(tx_count), min(tx_count)
    
@sign_up_decorator(functions_addr)
def basic_module_323(address, height_limit):
    txes = [tx for tx in address.txes if tx.block_height <= height_limit]
    active_n = sorted(list(set([tx.block_height for tx in txes])))
    if len(active_n) <= 1:
        return 0, 0, 0
    else:
        for index in range(len(active_n) - 1):
            active_n[index] = abs(active_n[index + 1] - active_n[index])
        active_HD = active_n[:-1]
        return sum(active_HD) / len(active_HD), max(active_HD), min(active_HD)
    
@sign_up_decorator(functions_addr)
def basic_module_324(address, height_limit):
    txes = [tx for tx in address.txes if tx.block_height <= height_limit]
    active_max_HD = sorted(list(set([tx.block_height for tx in txes])))
    return max(active_max_HD) - min(active_max_HD)

@sign_up_decorator(functions_addr)
def basic_module_325(address, height_limit):
    ins_HD = [_input.age for _input in address.inputs if _input.block.height <= height_limit]
    if ins_HD:
        return sum(ins_HD) / max(len(ins_HD), 1), max(ins_HD), min(ins_HD)
    else:
         return 0, 0, 0


@sign_up_decorator(functions_addr)
def basic_module_326(address, height_limit):
    outs_HD = [_output.spending_input.age for _output in address.outputs if _output.block.height <= height_limit and _output.is_spent]
    return sum(outs_HD) / len(outs_HD), max(outs_HD), min(outs_HD)

'''--------------------------------------------------------amount+time--------------------------------------------------------'''
@sign_up_decorator(functions_addr)
def basic_module_331(address, height_limit):
    tx_n = basic_module_311(address, height_limit)
    active_n = basic_module_321(address, height_limit)
    max_HD = max(basic_module_324(address, height_limit), 1)
    return tx_n / active_n, tx_n / max_HD

@sign_up_decorator(functions_addr)
def basic_module_332(address, height_limit):
    ins_n, outs_n = basic_module_312(address, height_limit)
    active_n = basic_module_321(address, height_limit)
    max_HD = max(basic_module_324(address, height_limit), 1)
    return ins_n / active_n, outs_n / active_n, ins_n / max_HD, outs_n / max_HD

@sign_up_decorator(functions_addr)
def basic_module_333(address, height_limit):
    ins_value, outs_value = basic_module_316(address, height_limit)
    active_n = basic_module_321(address, height_limit)
    max_HD = max(basic_module_324(address, height_limit), 1)
    return ins_value / active_n, outs_value / active_n, ins_value / max_HD, outs_value / max_HD




# 截止到某地址的路径
'''--------------------------------------------------------amount--------------------------------------------------------'''
@sign_up_decorator(functions_path)
def basic_module_411(outputs):
    return len(outputs)

@sign_up_decorator(functions_path)
def basic_module_412(outputs):
    txes = [out.tx for out in outputs]
    ans = 0
    for tx in txes:
        ans += tx.input_count()+tx.output_count()
    return ans
        
@sign_up_decorator(functions_path)
def basic_module_413(outputs):
    txes_outputs_value = [out.tx.output_value for out in outputs]
    return sum(txes_outputs_value)

'''--------------------------------------------------------time--------------------------------------------------------'''
@sign_up_decorator(functions_path)
def basic_module_421(outputs):
    txes = [out.tx for out in outputs]
    tx1 = txes[0]
    tx2 = txes[-1]
    return tx2.block_height - tx1.block_height

@sign_up_decorator(functions_path)
def basic_module_422(outputs):
    txes = [out.tx for out in outputs]
    if len(txes) <= 1:
        return 0, 0, 0
    else:
        txes_HD = [txes[i].block_height - txes[i + 1].block_height for i in range(len(txes) - 1)]
        return sum(txes_HD) / len(txes_HD), max(txes_HD), min(txes_HD)
    
'''--------------------------------------------------------time--------------------------------------------------------'''
@sign_up_decorator(functions_path)
def basic_module_431(outputs):
    return basic_module_412(outputs) / len(outputs), basic_module_412(outputs) / max(basic_module_421(outputs), 1)

@sign_up_decorator(functions_path)
def basic_module_432(outputs):
    return basic_module_413(outputs) / len(outputs), basic_module_413(outputs) / max(basic_module_421(outputs), 1)

@sign_up_decorator(functions_path)
def basic_module_433(outputs):
    return basic_module_421(outputs) / len(outputs)

In [None]:
def hasStr(address):
    try:
        string = address.address_string
        return True
    except:
        return False

def trace(path, pb, label):
    global global_data
    global blacklist
    
    tail_output = path[0]
    
    pb.set_postfix(length=len(path), step=0, data=len(global_data))
    
    if not hasStr(tail_output.address):
        return 
    addr = tail_output.address.address_string
    pb.set_postfix(length=len(path), endpoint=addr, step=1, data=len(global_data))
    if addr in blacklist or tail_output.address.input_txes_count() + tail_output.address.output_txes_count() >= 10000:
        blacklist.add(addr)
        return 

    
    pb.set_postfix(length=len(path), endpoint=addr, step=2)
    tail_tx = tail_output.tx
    if len(path) >= 5 or tail_tx.is_coinbase:
        global_data.append([[], [label]])
        data = global_data[-1][0]
        for index, output in enumerate(path):
            temp_data = []
            # utxo
            pb.set_postfix(length=len(path), endpoint=addr, step='utxo', data=len(global_data))
            for func in functions_utxo:
                rs = func(output)
                temp_data.extend(rs if isinstance(rs, tuple) else [rs])
            # tx
            pb.set_postfix(length=len(path), endpoint=addr, step='tx', data=len(global_data))
            for func in functions_tx:
                rs = func(output.tx)
                temp_data.extend(rs if isinstance(rs, tuple) else [rs])
            # addr
            pb.set_postfix(length=len(path), endpoint=addr, step='addr', data=len(global_data))
            for func in functions_addr:
                rs = func(output.address, output.tx.block_height)
                temp_data.extend(rs if isinstance(rs, tuple) else [rs])
            # path
            pb.set_postfix(length=len(path), endpoint=addr, step='path', data=len(global_data))
            for func in functions_path:
                rs = func(path[:index+1])
                temp_data.extend(rs if isinstance(rs, tuple) else [rs])
            data.append(copy.copy(temp_data))
        while len(data) < 5:
            data.insert(0, [0]*len(data[0]))
        if len(global_data) % 1000 == 0:
            output = open('path/{}-{}.pkl'.format(file, len(global_data)), 'wb')
            pickle.dump(global_data, output)
            output.close()
        return 
    
    pb.set_postfix(length=len(path), endpoint=addr, step=3, data=len(global_data))
    cnt = 0
    for _input in tail_tx.inputs:
        trace([_input.spent_output]+path, pb, label)
        cnt+=1
        if cnt==3:
            break
            

files = os.listdir("addresses")
files = list(set([f[:-6] for f in files if f[0]!='.']))

global_data = []

blacklist = set()

for file in files:
    # positive
    with open('addresses/'+file+'_p.txt', 'r') as f:
        lines = f.readlines()
        lines_tqdm = tqdm.tqdm(lines, desc=file+'-positive', ncols=150)
        for line in lines_tqdm:
            tx_hash, address_string = line.strip().split()
            tx = chain.tx_with_hash(tx_hash)
            for _input in tx.inputs:
                if str(_input.address.address_string) == address_string:
                    trace([_input.spent_output], lines_tqdm, label=1)
                    break
                      
    # negative
    with open('addresses/'+file+'_n.txt', 'r') as f:
        lines = f.readlines()
        lines_tqdm = tqdm.tqdm(lines, desc=file+'-negative', ncols=200)
        for line in lines_tqdm:
            tx_hash, address_string = line.strip().split()
            tx = chain.tx_with_hash(tx_hash)
            for _input in tx.inputs:
                if str(_input.address.address_string) == address_string:
                    trace([_input.spent_output], lines_tqdm, label=0)
                    break
            # lines_tqdm.set_postfix(tx_hash=tx_hash, address=address_string)
    output = open('path/{}_final.pkl'.format(file), 'wb')
    pickle.dump(global_data, output)
    output.close()