# Derive Top N Portfolio

In [1]:
'''
    WARNING CONTROL to display or ignore all warnings
'''
import warnings; warnings.simplefilter('ignore')     #switch betweeb 'default' and 'ignore'
import traceback

''' Set debug flag to view extended error messages; else set it to False to turn off debugging mode '''
debug = True

In [95]:
import os
import sys
from datetime import datetime, date, timedelta

sys.path.insert(1,"/home/nuwan/workspace/rezaware/")
import rezaware as reza
from utils.modules.etl.load import sparkDBwls as sdb
# from utils.modules.etl.transform import sparkCleanNRich as scne
from mining.modules.assets.etp import dailyTopN as topN
# from utils.modules.ml.timeseries import rollingstats as stats

''' restart initiate classes '''
if debug:
    import importlib
    reza = importlib.reload(reza)
    topN = importlib.reload(topN)
    sdb = importlib.reload(sdb)
#     scne = importlib.reload(scne)
#     stats= importlib.reload(stats)
    
__desc__ = "analyze crypto market capitalization time series data"
clsSDB = sdb.SQLWorkLoads(desc=__desc__)
# clsSCNR=scne.Transformer(desc=__desc__)
clsMPT =topN.WeightedPortfolio(desc=__desc__)
# clsStat=stats.RollingStats(desc=__desc__)
''' optional - if not specified class will use the default values '''
# prop_kwargs = {"WRITE_TO_TMP":True,   # necessary to emulate the etl dag
#               }
print("\nClass initialization and load complete!")

All functional APP-libraries in REZAWARE-package of REZAWARE-module imported successfully!
All functional DAILYTOPN-libraries in ETP-package of ASSETS-module imported successfully!
All functional SPARKDBWLS-libraries in LOAD-package of ETL-module imported successfully!
sparkNoSQLwls Class initialization complete
dailyTopN Class initialization complete

Class initialization and load complete!


## Read data from mcap_past
We apply a query to select assets with mcap > 1.0 million. Any missing values are imputed with the mean value.

In [96]:
_from_date = '2022-01-05'
_to_date = '2022-01-10'
_db_name = "warehouse.mcap_past"

# _query = "select * from warehouse.mcap_past "+\
#         f"where mcap_date between '{_from_date}' and '{_to_date}' "+\
#         f"and mcap_value > 1000000 and asset_name in "+\
#         "('brg_x','_crdn','avme','atri','ethix','hoge','xpx','wabi','dmg','mintme','chart')"
_query =f"select * from {_db_name} "+\
        f"where mcap_date between '{_from_date}' and '{_to_date}' "+\
        f"and mcap_value > 1000000 and log_ror is not null"

mcap_sdf = clsMPT.read_ror(select=_query,**_kwargs)

print("Loaded %d rows and %d columns" % (mcap_sdf.count(),len(mcap_sdf.columns)))

Wait a moment, retrieving data ...
Loaded 10908 rows and 17 columns


In [97]:
from pyspark.sql import functions as F

mcap_sdf.select(
    F.col('mcap_past_pk'),F.col('mcap_date'),
    F.col('asset_name'),F.col('mcap_value'),F.col('log_ror'))\
    .sort(F.col('mcap_date'))\
    .show(n=10)

+------------+-------------------+----------+--------------------+------------------+
|mcap_past_pk|          mcap_date|asset_name|          mcap_value|           log_ror|
+------------+-------------------+----------+--------------------+------------------+
|       62825|2022-01-05 00:00:00|      dobo|45051347.95446150...|0.7174060405566748|
|      206997|2022-01-05 00:00:00|       eba|3266653.843792310...|0.7127811661859375|
|      129876|2022-01-05 00:00:00|      dnxc|10688172.73902660...|0.7299033206850649|
|       47779|2022-01-05 00:00:00|      dodo|257791090.5306190...|0.7139591022686875|
|       73653|2022-01-05 00:00:00|       dhv|2469812.425692140...|0.7306216583929578|
|       65439|2022-01-05 00:00:00|       dit|1222199.880055260...|0.6843672495216264|
|       24607|2022-01-05 00:00:00|      dinu|4889941.152675570...|0.7311999041866360|
|      102635|2022-01-05 00:00:00|      digg|10770157.91034280...|0.6906054531057350|
|       42340|2022-01-05 00:00:00|      doex|7807522.4

## Weighted Portfolio

In [101]:
_cols={
    "PRIMARYKEY":'mcap_past_pk',
    "NAMECOLUMN":'asset_name',
    "DATECOLUMN":'mcap_date',
    "NUMCOLUMN" :'log_ror',
    "MCAPCOLUMN":'mcap_value',
    "WEIGHTCOLUMN":'weights',
    "MCAPSOURCE":'source',
}
_l_exp_wts,_cols_dict=clsMPT.get_weighted_mpt(
    data=mcap_sdf,
    cols_dict=_cols,
    topN=3,
    size=5,
    **_kwargs,
)
_l_exp_wts[:2]

                                                                                

[{'source': 'warehouse.mcap_past',
  'mcap_past_pk': [91391, 124438, 78462],
  'mcap_date': datetime.datetime(2022, 1, 9, 0, 0),
  'asset_name': ['btcs', 'nabox', 'yaxis'],
  'log_ror': [Decimal('2.9321703953454180'),
   Decimal('1.9620722641637465'),
   Decimal('1.4506834179632306')],
  'weights': [0.4201956739423498, 0.5430690045963892, 0.03673532146126092],
  'mcap_value': [Decimal('23756320.2128301000000000'),
   Decimal('66498522.3461270000000000'),
   Decimal('3036867.5895900100000000')]},
 {'source': 'warehouse.mcap_past',
  'mcap_past_pk': [130969, 111435, 62127],
  'mcap_date': datetime.datetime(2022, 1, 8, 0, 0),
  'asset_name': ['l2', 'falcx', 'dana'],
  'log_ror': [Decimal('1.5337095512357850'),
   Decimal('1.3188759637834600'),
   Decimal('1.2139873905404268')],
  'weights': [0.7107184070065857, 0.1185356605340137, 0.1707459324594007],
  'mcap_value': [Decimal('10755738.7793784000000000'),
   Decimal('2651804.8528023700000000'),
   Decimal('26502663.9578732000000000')]}]

In [100]:
_l_exp_wts[:2]

[{'source': 'warehouse.mcap_past',
  'mcap_past_pk': [19518, 171390, 17888],
  'mcap_date': datetime.datetime(2022, 1, 9, 0, 0),
  'asset_name': ['btc', 'bnb', 'avax'],
  'mcap_value': [Decimal('799651753161.3470000000000000'),
   Decimal('75445470567.1973000000000000'),
   Decimal('21023293159.5510000000000000')],
  'weights': [0.6205951435024774, 0.2770582324104319, 0.10234662408709076]},
 {'source': 'warehouse.mcap_past',
  'mcap_past_pk': [19517, 171389, 17887],
  'mcap_date': datetime.datetime(2022, 1, 8, 0, 0),
  'asset_name': ['btc', 'bnb', 'avax'],
  'mcap_value': [Decimal('797636593191.0020000000000000'),
   Decimal('77120090969.0475000000000000'),
   Decimal('21798285422.6966000000000000')],
  'weights': [0.8085400994030512, 0.18539785043999601, 0.006062050156952884]}]

## Write MPT to MongoDB

* Collection name = "mpt."+date(YYYY-MM-DD)
* document structure: \_id, date, asset, mcap.value, mcap.weight, mcap.ror

In [99]:
_kwargs = {
    "DESTINDBNAME":'tip-daily-mpt',
    "COLLPREFIX" : 'mpt'
}
mpt_list_ = clsMPT.write_mpt_to_db(
    mpt_data=_l_exp_wts,
    cols_dict=_cols,
    **_kwargs,
)
print("Upsert %d documents" % len(mpt_list_))

Total 3 documents, successful insert count = 0 & modify count = 3
Total 3 documents, successful insert count = 0 & modify count = 3
Total 3 documents, successful insert count = 0 & modify count = 3
Total 3 documents, successful insert count = 0 & modify count = 3
Total 3 documents, successful insert count = 0 & modify count = 3
Total 3 documents, successful insert count = 0 & modify count = 3
Upsert 6 documents
