In [1]:
#given a tree and pickled model, predict top N probabilities and classes
from functions import *

#test data
table = 'contract-explorer-233919.ethparis.functions3'
limit = 'LIMIT 100'

sql = '''
SELECT *  FROM `{}`
 {}
'''.format(table, limit)

df = client.query(sql, location='europe-west2').to_dataframe()

target = df.hash
trees = df.tree

#unpickle processing pipeline and model
rf = pd.read_pickle('pickle/rf.p')
pipe = pd.read_pickle('pickle/pipe.p')

# %%writefile servemodel.py


In [3]:

def predict_model(trees, N=3):
    '''
    Input: list of JSON trees, N
    Output: Top N probabilities, Top N classes (lists)'''
    
    #run data preprocessing pipeline
    pp = Preprocess().fit_transform(trees)
    result_test = pipe.transform(pp)
    
    #make prediction, sort indices by highest probability
    probs = rf.predict_proba(result_test)
    indices = np.argsort(probs, axis=1)[:,-N:]
    p = copy(probs)
    p.sort(axis=1)
    
    #sort in reverse order, flatten to 1D array (don't do flatten if predicting multiple points at once)
    top_n_probs = p[:,-N:][:,::-1]
    top_n_classes = rf.classes_[indices][:,::-1]
    
    return pd.DataFrame(np.concatenate((df[['addr', 'hash']].values.reshape(-1,2),top_n_probs, top_n_classes), axis=1))


result = predict_model(trees, N=1)

In [4]:
result.to_csv('output.csv')

In [5]:
result

Unnamed: 0,0,1,2,3
0,0x02c740252BdC9a08Cbd5DCca17DC8934F3a53bB5,0x03959bb7,0.268007,0xf2fde38b
1,0x02c740252BdC9a08Cbd5DCca17DC8934F3a53bB5,0xa39a45b7,0.144802,0x88975198
2,0x02c740252BdC9a08Cbd5DCca17DC8934F3a53bB5,0xad68ebf7,0.6875,0xad68ebf7
3,0x654Cc9EC854E118EF050a01B40c14716c22c4fcC,0x2e1a7d4d,0.145833,0x3ccfd60b
4,0x654Cc9EC854E118EF050a01B40c14716c22c4fcC,0x41c0e1b5,0.791667,0x41c0e1b5
5,0x84EF4b2357079CD7A7C69fD7a37cd0609a679106,0x013cf08b,0.0833333,0x3e239e1a
6,0x84EF4b2357079CD7A7C69fD7a37cd0609a679106,0x0c3b7b96,0.402679,0xef78d4fd
7,0x84EF4b2357079CD7A7C69fD7a37cd0609a679106,0x0e708203,0.394058,0x8da5cb5b
8,0x84EF4b2357079CD7A7C69fD7a37cd0609a679106,0x149acf9a,0.0829334,0xde28fc1d
9,0x84EF4b2357079CD7A7C69fD7a37cd0609a679106,0x1f2dc5ef,0.708333,0x1f2dc5ef
