# Select top-N assets with good indicators
The objective is to calculate the standard indicators for a top N number of assets. Then use the indicators to decide on a potentially significant set of assets to consider for the portfolio. Thereafter, apply the MPT monte carlo algorithm to construct a weigted portfolio. 

In [1]:
'''
    WARNING CONTROL to display or ignore all warnings
'''
import warnings; warnings.simplefilter('ignore') #switch betweeb 'default' and 'ignore'
import traceback

''' Set debug flag to view extended error messages; 
    else set it to False to turn off debugging mode '''
debug = True

## Initialize the classes

In [83]:
import os
import sys
from datetime import datetime, date, timedelta
from pyspark.sql import functions as F

sys.path.insert(1,"/home/nuwan/workspace/rezaware/")
from mining.modules.assets.etp import dailyTopN as topN
from mining.modules.assets.etp import performIndex as idx
from utils.modules.ml.timeseries import rollingstats as stat

''' restart initiate classes '''
if debug:
    import importlib
    topN = importlib.reload(topN)
    idx = importlib.reload(idx)
    stat = importlib.reload(stat)

__desc__ = "analyze crypto market capitalization time series data"
clsTopN = topN.WeightedPortfolio(desc=__desc__)
clsIdx = idx.Portfolio(desc=__desc__)
clsStat = stat.RollingStats(desc=__desc__)
print("\nClass initialization and load complete!")

All functional DAILYTOPN-libraries in ETP-package of ASSETS-module imported successfully!
All functional PERFORMINDEX-libraries in ETP-package of ASSETS-module imported successfully!
All packages in utils ml timeseries RollingStats imported successfully!
sparkNoSQLwls Class initialization complete
sparkNoSQLwls Class initialization complete
execSession Class initialization complete
performIndex Class initialization complete
execSession Class initialization complete
dailyTopN Class initialization complete
sparkNoSQLwls Class initialization complete
execSession Class initialization complete
performIndex Class initialization complete
execSession Class initialization complete

Class initialization and load complete!


## Select topN assets

### Read top N mcap assets from tip sql db

Set the following parameters to select the mcap data from the database
* ```_num_assets``` (integer) limits the number of asset count
* ```_mcap_val_lb```(decimal) limits the asset selection by mcap_value
* ```_date``` (datetime) selects assets with values for that day
* ```_table```(string) by default is 'warehouse.mcap_past' where mcap daily data is stored

Extends the ```utils/etl/load/sparkDBwls``` package to read the data from database table


### Construct a dict with selected assets
The dictionary serves as an input to the ```mining/modules/assets/etp/performIndex``` package to compuiting the index values.

[Common metrics for assessing asset strength](https://cointelegraph.com/learn/the-most-common-crypto-metrics-a-beginners-guide)

In [85]:
_num_assets=23
_mcap_val_lb=100000000.0
_date=date(2023,3,30)
_table='warehouse.mcap_past'
# _idx_types=['adx']
# _idx_types=['adx','rsi','mfi','macd']
_idx_types = {
    "ADX": {"DATEATTR":'mcap_date',"VALUEATTR":'mcap_log_ror',
            "WINLENGTH":14,"WINUNIT":'DAY'},
#     "RSI":{"DATEATTR":'price_date',"VALUEATTR":'price_log_ror',
#            "WINLENGTH":7,"WINUNIT":'DAY'},
#     "MFI":{"DATEATTR":'mcap_date',"VALUEATTR":'mcap_log_ror',
#            "WINLENGTH":7,"WINUNIT":'DAY'},
#     "MACD":{"DATEATTR":'price_date',"VALUEATTR":'price_log_ror',
#             "WINLENGTH":20,"WINUNIT":'DAY'},
}
kwargs={
    "TABLENAME" :_table,
    "ASSETCOUNT":_num_assets,
    "INDEXTYPELIST":_idx_types,
    "STANDARDIZE" :'minmax',
    "COLPREFIX" :'scaled',
#     "WINLENGTH" :20,
#     "WINUNIT" :'DAY',
    "PCALOWERBOUND":0.7,
}

_portfolio=clsTopN.select_top_assets(
    mcap_date=_date,
    mcap_value_lb=_mcap_val_lb,
    indicators=_idx_types,
    **kwargs,
)

_portfolio[:5]

Unnamed: 0,mcap_past_pk,uuid,asset_name,mcap_date,mcap_value,price_date,price_value,ADX
0,24743,64536da74a6e0a5e3a670f9c,bitcoin,2023-03-30,560638000000.0,2023-03-30T00:00:00,29021.35212,0.078149
0,14714,6453755a4a6e0a5e3a691061,chainlink,2023-03-30,3884469000.0,2023-03-30T00:00:00,7.507228,0.004053
0,10477,6453d0d54a6e0a5e3a804c6a,the_open_network,2023-03-30,3196606000.0,2023-03-30T00:00:00,2.167909,0.287808
0,40236,64536dbf4a6e0a5e3a67180e,bitcoin_cash,2023-03-30,2412975000.0,2023-03-30T00:00:00,124.84391,0.149247
0,46795,6453664a4a6e0a5e3a652402,aptos,2023-03-30,2111350000.0,2023-03-30T00:00:00,11.76164,0.337051


In [76]:
from pyspark.sql import functions as F
clsTopN.data\
            .select(F.col('asset_name'),F.col('price_date'),F.col('price_log_ror'),
                    F.col('mcap_date'),F.col('mcap_log_ror'))\
            .show(n=50)

+-----------------+-------------------+------------------+-------------------+------------------+
|       asset_name|         price_date|     price_log_ror|          mcap_date|      mcap_log_ror|
+-----------------+-------------------+------------------+-------------------+------------------+
|          bitcoin|2023-03-30 00:00:00|0.0221930809804374|2023-03-30 00:00:00|0.0253066726210866|
|        chainlink|2023-03-30 00:00:00|0.0347777497155855|2023-03-30 00:00:00|0.1089231709500041|
| the_open_network|2023-03-30 00:00:00|0.0207418454174259|2023-03-30 00:00:00|0.0212451815658178|
|     bitcoin_cash|2023-03-30 00:00:00|0.0047760624132737|2023-03-30 00:00:00|0.0045689860826454|
|            aptos|2023-03-30 00:00:00|0.0024116120519500|2023-03-30 00:00:00|0.0054086115476947|
|           hedera|2023-03-30 00:00:00|0.1137924980407853|2023-03-30 00:00:00|0.1051859223223360|
|           cronos|2023-03-30 00:00:00|0.0111506675327173|2023-03-30 00:00:00|0.0098774246928719|
|    near_protocol|2

In [22]:
from pyspark.sql import functions as F
from pyspark.sql.functions import pandas_udf
# from pyspark.sql.functions import PandasUDFType
from pyspark.sql.types import DoubleType, StructField

def exp_ma(df, group_col='asset_name', sort_col='mcap_date',**kwargs):
    schema = (df.select('*')
        .schema.add(StructField('ewma', DoubleType())))

    @pandas_udf(schema, F.PandasUDFType.GROUPED_MAP)
    def ema(pdf):
        pdf['ewma'] = pdf['mcap_value'].ewm(span=5, min_periods=1).mean()
        return pdf

    return df.groupby('asset_name').apply(ema)

# df = spark.createDataFrame(
#     [("a", 1, 1), ("a", 2, 3), ("a", 3, 3), ("b", 1, 10), ("b", 8, 3), ("b", 9, 0)], 
#     ("name", "date", "count")
# )

exp_ma(clsTopN.data).show()

[Stage 2179:>                                                       (0 + 1) / 1]

+------------+--------------------+-----------+----------------+------------+----------------+--------+-------------------+--------------------+------------------+------------------+-------------------+--------------------+---------+------------------+------------------+-------------------+--------------------+-------------+--------------------+----------+--------------------+--------------------+-----------+--------------------+-------------+-------------------+
|mcap_past_pk|                uuid|data_source|      asset_name|asset_symbol|    alt_asset_id|currency|         price_date|         price_value|     price_log_ror|    price_simp_ror|          mcap_date|          mcap_value|mcap_rank|      mcap_log_ror|     mcap_simp_ror|        volume_date|         volume_size|volume_change|          created_dt|created_by|        created_proc|         modified_dt|modified_by|       modified_proc|deactivate_dt|               ewma|
+------------+--------------------+-----------+----------------+

                                                                                

In [6]:
from pyspark.sql import functions as F

print(clsTopN.index.select(F.col('asset_name'),F.col('1-PC[0]'),F.col('PC[0]'))\
      .sort('1-PC[0]',ascending=False).show())

AttributeError: 'DataFrame' object has no attribute 'select'

## Write portfolio to mongodb

In [121]:
kwargs={
    "DESTINDBNAME":'tip-portfolio',
    "COLLPREFIX" : 'cherry_picked',
    "UIDSLIST" : ['uuid'],
    "DATECOLNAME":'asset.mcap.date',
}
_saved_port = clsTopN.write_asset_picks_to_db(
    portfolio_data=_portfolio,
    cols_dict={},
    **kwargs,
)
_saved_port[:1]

[[{'mcap.db.fk': 53843,
   'uuid': '63976ea07cc473c589257bce',
   'asset.name': 'matic',
   'asset.mcap.date': '2022-01-27T00:00:00',
   'asset.mcap.value': 12212915722.4229,
   'index.adx': -0.39521888110019904,
   'index.sharp': 16.42596803177328,
   'index.rsi': 0.2913695270171328,
   'index.mfi': 0.2913695270171328,
   'index.beta': -0.7605901418779902,
   'index.scaled.beta': 0.1883,
   'index.scaled.adx': 0.0,
   'index.scaled.mfi': 0.2732,
   'index.scaled.rsi': 0.2732,
   'index.scaled.sharp': 0.1905,
   'index.pc.0': -0.2754386955070266,
   'index.pc.1': -0.3140925697767417,
   'index.scaled.pc.0': 0.282,
   'index.scaled.pc.1': 0.9453,
   'index.scaled.1-pc.0': 0.718,
   'index.scaled.1-pc.1': 0.05469999999999997,
   'audit.mod.by': 'NUWAN',
   'audit.mod.dt': '2023-04-04T18:17:22',
   'audit.mod.proc': 'dailyTopN-function <write_asset_picks_to_db>',
   '_id': ObjectId('642bf93240bd6d8d5b8ac22a')},
  {'mcap.db.fk': 380,
   'uuid': '6397be217cc473c58945e15c',
   'asset.name': 