In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import json
import pandas as pd
from multithread_processing.base_job import BaseJob
from typing import Dict, List

from databases.mongodb import MongoDB
from databases.mongodb_entity import MongoDBEntity

db = MongoDB()
entity_db = MongoDBEntity()

In [5]:
class EncodeLendingWallets(BaseJob):
    def __init__(self, wallets_list: list, filename: str,
                 max_workers=16, batch_size=100):
        self.filename = filename
        self.onehot_mapper = {'aave': 0, 'venus': 1, 'trava': 2, 'cream': 3, 'valas': 4, 'compound': 5, 'geist': 6}  # bsc: venus, trava, cream, valas

        self.wallets_encoded: Dict[str: List] = dict()

        super().__init__(work_iterable=wallets_list,
                         max_workers=max_workers,
                         batch_size=batch_size)

    def _execute_batch(self, works):
        for wallet in works:
            address = wallet['address']
            pools = wallet['lendingPools']
            onehot_vector = [0] * 7
            for pool, onehot_position in self.onehot_mapper.items():
                if pool in pools:
                    onehot_vector[onehot_position] = 1
            self.wallets_encoded[address] = onehot_vector

    def _end(self):
        super()._end()
        # save to csv
        df = pd.DataFrame.from_dict(self.wallets_encoded, 
                                    orient='index', 
                                    columns=['aave', 'venus', 'trava', 'cream', 'valas', 'compound', 'geist'])
        df.to_csv(self.filename)

In [6]:
with open('.data/exist_lending_wallets_y.json', 'r') as f:
    lending_wallets = json.load(f)

job = EncodeLendingWallets(wallets_list=lending_wallets, 
                           filename='.data/encoded_lending_wallets_y.csv')
job.run()

[08-10-2023 02:48:57 +07] [INFO] [BatchWorkExecutor] - Started work. Items to process: 689.
[08-10-2023 02:48:57 +07] [INFO] [BatchWorkExecutor] - 100 items processed. Progress is 14%.
[08-10-2023 02:48:57 +07] [INFO] [BatchWorkExecutor] - 200 items processed. Progress is 29%.
[08-10-2023 02:48:57 +07] [INFO] [BatchWorkExecutor] - 300 items processed. Progress is 43%.
[08-10-2023 02:48:57 +07] [INFO] [BatchWorkExecutor] - 400 items processed. Progress is 58%.
[08-10-2023 02:48:57 +07] [INFO] [BatchWorkExecutor] - 500 items processed. Progress is 72%.
[08-10-2023 02:48:57 +07] [INFO] [BatchWorkExecutor] - 600 items processed. Progress is 87%.
[08-10-2023 02:48:57 +07] [INFO] [BatchWorkExecutor] - 689 items processed. Progress is 100%.
[08-10-2023 02:48:57 +07] [INFO] [BatchWorkExecutor] - Finished work. Total items processed: 689. Took 0:00:00.023305.
