# Derive Top N Portfolio

In [70]:
'''
    WARNING CONTROL to display or ignore all warnings
'''
import warnings; warnings.simplefilter('ignore')     #switch betweeb 'default' and 'ignore'
import traceback

''' Set debug flag to view extended error messages; else set it to False to turn off debugging mode '''
debug = True

In [121]:
import os
import sys
from datetime import datetime, date, timedelta

sys.path.insert(1,"/home/nuwan/workspace/rezaware/")
import rezaware as reza
from utils.modules.etl.load import sparkDBwls as sdb
from utils.modules.etl.transform import sparkCleanNRich as scne
from wrangler.modules.assets.etl import dataPrep as prep
# from utils.modules.ml.timeseries import rollingstats as stats

''' restart initiate classes '''
if debug:
    import importlib
    reza = importlib.reload(reza)
    sdb = importlib.reload(sdb)
    scne = importlib.reload(scne)
    prep = importlib.reload(prep)
#     stats= importlib.reload(stats)
    
__desc__ = "analyze crypto market capitalization time series data"
# clsSDB = sdb.SQLWorkLoads(desc=__desc__)
clsSCNR=scne.Transformer(desc=__desc__)
# clsStat=stats.RollingStats(desc=__desc__)
clsPrep =prep.RateOfReturns(desc=__desc__)
''' optional - if not specified class will use the default values '''
# prop_kwargs = {"WRITE_TO_TMP":True,   # necessary to emulate the etl dag
#               }
print("\nClass initialization and load complete!")

All functional APP-libraries in REZAWARE-package of REZAWARE-module imported successfully!
All functional SPARKDBWLS-libraries in LOAD-package of ETL-module imported successfully!
All functional SPARKCLEANNRICH-libraries in TRANSFORM-package of ETL-module imported successfully!
All functional DATAPREP-libraries in ETL-package of ASSETS-module imported successfully!
sparkNoSQLwls Class initialization complete
dataPrep Class initialization complete

Class initialization and load complete!


## Read data from mcap_past
We apply a query to select assets with mcap > 1.0 million. Any missing values are imputed with the mean value.

In [134]:
_from_date = '2022-01-01'
_to_date = '2022-01-31'
# _query = "select * from warehouse.mcap_past "+\
#         f"where mcap_date >= '{_from_date}' and "+\
#         f"mcap_date <= '{_to_date}'"
_query = "select * from warehouse.mcap_past wmp "+\
        f"where wmp.mcap_date between '{_from_date}' and '{_to_date}' "+\
        f"and wmp.mcap_value > 10000 "
_kwargs = {
    "TABLENAME":'warehouse.mcap_past',
    "COLUMN":'mcap_date',
    "FROMDATETIME":_from_date,
    "TODATETIME":_to_date,
    "PARTITIONS":2,
    "AGGREGATE":'avg',
    "LOGBASE":'10',
    "PIVCOLUMNS":['dxd','sofi','wsn','xmx','uqc','btr','unic','nex','noia',
                  'hanu','aca','bbs','xvs','pnd','shake','stpl','dtx','tethys',
                  'kyoko','boba','nlife','rare','eved','yfl','fkx','flixx',
                  'drk','meto','glide','shr','tetu','mft','cmerge','shmn','tronpad']
}

# print(clsSpark.dbSchema)
mcap_sdf = clsPrep.read_n_clean_mcap(query=_query,**_kwargs)
# mcap_sdf = clsROR.read_n_clean_mcap(**_kwargs)

print("Loaded %d rows and %d columns" % (mcap_sdf.count(),len(mcap_sdf.columns)))

Wait a moment, retrieving data ...


[Stage 1869:>                                                       (0 + 1) / 1]

Loaded 74300 rows and 17 columns


                                                                                

In [135]:
from pyspark.sql import functions as F

mcap_sdf=mcap_sdf.sort(F.col('mcap_date'),F.col('mcap_value'))
mcap_sdf=mcap_sdf.filter(F.col('mcap_past_pk').isNotNull())
mcap_sdf.select(F.col('mcap_past_pk'),
                F.col('mcap_date'),
                F.col('asset_name'),
                F.col('mcap_value'))\
    .show(n=5)

+------------+-------------------+----------+--------------------+
|mcap_past_pk|          mcap_date|asset_name|          mcap_value|
+------------+-------------------+----------+--------------------+
|       49194|2022-01-01 00:00:00|       sas|10621.17568446250...|
|      169510|2022-01-01 00:00:00|      edao|10948.90171404940...|
|       39374|2022-01-01 00:00:00|      scho|12262.57486049260...|
|       17823|2022-01-01 00:00:00|     rigel|13036.58566444630...|
|      156450|2022-01-01 00:00:00|      brtr|13346.88083840520...|
+------------+-------------------+----------+--------------------+
only showing top 5 rows



[Stage 1877:>                                                       (0 + 1) / 1]                                                                                

## Compute LogROR for all assets

In [137]:
_ror='NATLOG'
_ror_col=None

if _ror=='NATLOG':
    _ror_col='log_ror'
elif _ror=='SIMP':
    _ror_col='simp_ror'
else:
    pass
_kwargs={
    "PREVALCOLNAME":'mcap_lag',
    "DIFFCOLNAME":'mcap_diff',
    "RORCOLNAME":_ror_col,
}

_mcap_log_ror, _ror_col = clsPrep.get_ror(
    data=mcap_sdf,
    ror_type=_ror,
    num_col ="mcap_value",
    part_col='asset_name',
    date_col='mcap_date',
    **_kwargs,
)

## Write ROR data to DB

In [138]:
_upsert_sdf=_mcap_log_ror.select( 
    F.col('mcap_past_pk'),
    F.col('asset_name'),
    F.col('mcap_date'),
    F.col('mcap_value'),
    F.col(_ror_col),
)\
    .filter((F.col(_ror_col).isNotNull()))
_upsert_sdf.show(n=5)

_records=clsPrep.write_data_to_db(
    data=_upsert_sdf,
#     **kwargs,
)
print("Upserted %d records" % _records)

                                                                                

+------------+----------+-------------------+--------------------+--------------------+
|mcap_past_pk|asset_name|          mcap_date|          mcap_value|             log_ror|
+------------+----------+-------------------+--------------------+--------------------+
|       91384|      btcs|2022-01-02 00:00:00|2785102.132352710...| 0.05284004754788873|
|      202976|     sngls|2022-01-02 00:00:00|155996.7475973480...|0.059862920030564884|
|      120739|      shdw|2022-01-02 00:00:00|364515.8216135790...| 0.23306536690933657|
|       67523|      sybc|2022-01-02 00:00:00|79187.09948750640...| 0.38644877160434254|
|      153610|     scriv|2022-01-02 00:00:00|122004.6214330040...|  0.3963025935129252|
+------------+----------+-------------------+--------------------+--------------------+
only showing top 5 rows

Validating upsert attributes and parameters ...


                                                                                

Wait a moment, writing data to postgresql tip database ...


[Stage 1996:>                                                       (0 + 1) / 1]

Upserted 71034 records


                                                                                

# DEPRECATED

## Weighted Portfolio

In [11]:
_cols={
    "NAMECOLUMN":'asset_name',
    "DATECOLUMN":'mcap_date',
    "RORCOLUMN" :_log_col,
    "MCAPCOLUMN":'mcap_value',
    "WEIGHTCOLUMN":'weights',
}
_l_exp_wts,_cols_dict=clsMPT.get_weighted_mpt(
    data=_mcap_log_ror,
    cols_dict=_cols,
#     date_col='mcap_date',
#     val_col='log_ror',
#     name_col='asset_name',
    topN=3,
    size=5,
    **_kwargs,
)
# print("Dates: %s" % str(_wr_dates))
# print("Data : row=%d columns=%d" % (_wr_data.count(),len(_wr_data.columns)))
_l_exp_wts

[Stage 109:>                                                        (0 + 1) / 1]

23/01/28 09:49:03 WARN DAGScheduler: Broadcasting large task binary with size 3.2 MiB


                                                                                

23/01/28 09:49:08 WARN DAGScheduler: Broadcasting large task binary with size 3.2 MiB


[Stage 115:>                                                        (0 + 1) / 1]

23/01/28 09:49:13 WARN DAGScheduler: Broadcasting large task binary with size 4.4 MiB


[Stage 127:>                                                        (0 + 1) / 1]

23/01/28 09:51:38 WARN DAGScheduler: Broadcasting large task binary with size 3.2 MiB


                                                                                

23/01/28 09:51:43 WARN DAGScheduler: Broadcasting large task binary with size 3.2 MiB


[Stage 133:>                                                        (0 + 1) / 1]

23/01/28 09:51:48 WARN DAGScheduler: Broadcasting large task binary with size 4.4 MiB


[Stage 137:>                                                        (0 + 1) / 1]

23/01/28 09:51:50 WARN DAGScheduler: Broadcasting large task binary with size 4.4 MiB


[Stage 151:>                                                        (0 + 1) / 1]

23/01/28 09:53:24 WARN DAGScheduler: Broadcasting large task binary with size 3.2 MiB


                                                                                

23/01/28 09:53:28 WARN DAGScheduler: Broadcasting large task binary with size 3.2 MiB


[Stage 157:>                                                        (0 + 1) / 1]

23/01/28 09:53:32 WARN DAGScheduler: Broadcasting large task binary with size 4.4 MiB


[Stage 161:>                                                        (0 + 1) / 1]

23/01/28 09:53:34 WARN DAGScheduler: Broadcasting large task binary with size 4.4 MiB


[Stage 175:>                                                        (0 + 1) / 1]

23/01/28 09:55:48 WARN DAGScheduler: Broadcasting large task binary with size 3.2 MiB


                                                                                

23/01/28 09:55:52 WARN DAGScheduler: Broadcasting large task binary with size 3.2 MiB


[Stage 181:>                                                        (0 + 1) / 1]

23/01/28 09:55:57 WARN DAGScheduler: Broadcasting large task binary with size 4.4 MiB


[Stage 193:>                                                        (0 + 1) / 1]

23/01/28 09:57:54 WARN DAGScheduler: Broadcasting large task binary with size 3.2 MiB


                                                                                

23/01/28 09:57:59 WARN DAGScheduler: Broadcasting large task binary with size 3.2 MiB


[Stage 199:>                                                        (0 + 1) / 1]

23/01/28 09:58:05 WARN DAGScheduler: Broadcasting large task binary with size 4.4 MiB


[Stage 203:>                                                        (0 + 1) / 1]

23/01/28 09:58:08 WARN DAGScheduler: Broadcasting large task binary with size 4.4 MiB


                                                                                

23/01/28 09:58:08 WARN DAGScheduler: Broadcasting large task binary with size 4.5 MiB


[Stage 213:>                                                        (0 + 1) / 1]

23/01/28 09:58:11 WARN DAGScheduler: Broadcasting large task binary with size 4.4 MiB


[Stage 222:>                                                        (0 + 1) / 1]

23/01/28 10:00:47 WARN DAGScheduler: Broadcasting large task binary with size 3.2 MiB


                                                                                

23/01/28 10:00:51 WARN DAGScheduler: Broadcasting large task binary with size 3.2 MiB


[Stage 228:>                                                        (0 + 1) / 1]

23/01/28 10:00:55 WARN DAGScheduler: Broadcasting large task binary with size 4.4 MiB
23/01/28 10:00:57 WARN DAGScheduler: Broadcasting large task binary with size 4.4 MiB


[Stage 246:>                                                        (0 + 1) / 1]

23/01/28 10:02:05 WARN DAGScheduler: Broadcasting large task binary with size 3.2 MiB


                                                                                

23/01/28 10:02:10 WARN DAGScheduler: Broadcasting large task binary with size 3.2 MiB


[Stage 252:>                                                        (0 + 1) / 1]

23/01/28 10:02:17 WARN DAGScheduler: Broadcasting large task binary with size 4.4 MiB


[Stage 256:>                                                        (0 + 1) / 1]

23/01/28 10:02:19 WARN DAGScheduler: Broadcasting large task binary with size 4.4 MiB


[Stage 270:>                                                        (0 + 1) / 1]

23/01/28 10:03:16 WARN DAGScheduler: Broadcasting large task binary with size 3.2 MiB


                                                                                

23/01/28 10:03:21 WARN DAGScheduler: Broadcasting large task binary with size 3.2 MiB


[Stage 276:>                                                        (0 + 1) / 1]

23/01/28 10:03:26 WARN DAGScheduler: Broadcasting large task binary with size 4.4 MiB


[Stage 280:>                                                        (0 + 1) / 1]

23/01/28 10:03:27 WARN DAGScheduler: Broadcasting large task binary with size 4.4 MiB


[Stage 294:>                                                        (0 + 1) / 1]

23/01/28 10:04:23 WARN DAGScheduler: Broadcasting large task binary with size 3.2 MiB


                                                                                

23/01/28 10:04:28 WARN DAGScheduler: Broadcasting large task binary with size 3.2 MiB


[Stage 300:>                                                        (0 + 1) / 1]

23/01/28 10:04:34 WARN DAGScheduler: Broadcasting large task binary with size 4.4 MiB


[Stage 304:>                                                        (0 + 1) / 1]

23/01/28 10:04:36 WARN DAGScheduler: Broadcasting large task binary with size 4.4 MiB


[Stage 318:>                                                        (0 + 1) / 1]

23/01/28 10:06:24 WARN DAGScheduler: Broadcasting large task binary with size 3.2 MiB


                                                                                

23/01/28 10:06:31 WARN DAGScheduler: Broadcasting large task binary with size 3.2 MiB


[Stage 328:>                                                        (0 + 1) / 1]

23/01/28 10:06:40 WARN DAGScheduler: Broadcasting large task binary with size 4.4 MiB
23/01/28 10:06:43 WARN DAGScheduler: Broadcasting large task binary with size 4.4 MiB


[Stage 342:>                                                        (0 + 1) / 1]

23/01/28 10:08:59 WARN DAGScheduler: Broadcasting large task binary with size 3.2 MiB


                                                                                

23/01/28 10:09:04 WARN DAGScheduler: Broadcasting large task binary with size 3.2 MiB


[Stage 348:>                                                        (0 + 1) / 1]

23/01/28 10:09:09 WARN DAGScheduler: Broadcasting large task binary with size 4.4 MiB


[Stage 352:>                                                        (0 + 1) / 1]

23/01/28 10:09:12 WARN DAGScheduler: Broadcasting large task binary with size 4.4 MiB


                                                                                

23/01/28 10:09:13 WARN DAGScheduler: Broadcasting large task binary with size 4.5 MiB


[Stage 362:>                                                        (0 + 1) / 1]

23/01/28 10:09:16 WARN DAGScheduler: Broadcasting large task binary with size 4.4 MiB


[Stage 371:>                                                        (0 + 1) / 1]

23/01/28 10:10:55 WARN DAGScheduler: Broadcasting large task binary with size 3.2 MiB


                                                                                

23/01/28 10:10:59 WARN DAGScheduler: Broadcasting large task binary with size 3.2 MiB


[Stage 377:>                                                        (0 + 1) / 1]

23/01/28 10:11:04 WARN DAGScheduler: Broadcasting large task binary with size 4.4 MiB


[Stage 381:>                                                        (0 + 1) / 1]

23/01/28 10:11:06 WARN DAGScheduler: Broadcasting large task binary with size 4.4 MiB


                                                                                

23/01/28 10:11:07 WARN DAGScheduler: Broadcasting large task binary with size 4.5 MiB


[Stage 391:>                                                        (0 + 1) / 1]

23/01/28 10:11:09 WARN DAGScheduler: Broadcasting large task binary with size 4.4 MiB


[Stage 400:>                                                        (0 + 1) / 1]

23/01/28 10:12:26 WARN DAGScheduler: Broadcasting large task binary with size 3.2 MiB


                                                                                

23/01/28 10:12:30 WARN DAGScheduler: Broadcasting large task binary with size 3.2 MiB


[Stage 406:>                                                        (0 + 1) / 1]

23/01/28 10:12:34 WARN DAGScheduler: Broadcasting large task binary with size 4.4 MiB
23/01/28 10:12:36 WARN DAGScheduler: Broadcasting large task binary with size 4.4 MiB


[Stage 415:>                                                        (0 + 1) / 1]                                                                                

23/01/28 10:12:37 WARN DAGScheduler: Broadcasting large task binary with size 4.5 MiB
23/01/28 10:12:38 WARN DAGScheduler: Broadcasting large task binary with size 4.4 MiB


[Stage 429:>                                                        (0 + 1) / 1]

23/01/28 10:13:49 WARN DAGScheduler: Broadcasting large task binary with size 3.2 MiB


                                                                                

23/01/28 10:13:53 WARN DAGScheduler: Broadcasting large task binary with size 3.2 MiB


[Stage 435:>                                                        (0 + 1) / 1]

23/01/28 10:13:57 WARN DAGScheduler: Broadcasting large task binary with size 4.4 MiB
23/01/28 10:14:00 WARN DAGScheduler: Broadcasting large task binary with size 4.4 MiB


                                                                                

23/01/28 10:14:00 WARN DAGScheduler: Broadcasting large task binary with size 4.5 MiB


[Stage 449:>                                                        (0 + 1) / 1]

23/01/28 10:14:03 WARN DAGScheduler: Broadcasting large task binary with size 4.4 MiB


[Stage 458:>                                                        (0 + 1) / 1]

23/01/28 10:15:22 WARN DAGScheduler: Broadcasting large task binary with size 3.2 MiB


                                                                                

23/01/28 10:15:26 WARN DAGScheduler: Broadcasting large task binary with size 3.2 MiB


[Stage 464:>                                                        (0 + 1) / 1]

23/01/28 10:15:31 WARN DAGScheduler: Broadcasting large task binary with size 4.4 MiB


[Stage 468:>                                                        (0 + 1) / 1]

23/01/28 10:15:33 WARN DAGScheduler: Broadcasting large task binary with size 4.4 MiB


[Stage 473:>                                                        (0 + 1) / 1]                                                                                

23/01/28 10:15:34 WARN DAGScheduler: Broadcasting large task binary with size 4.5 MiB


[Stage 478:>                                                        (0 + 1) / 1]

23/01/28 10:15:36 WARN DAGScheduler: Broadcasting large task binary with size 4.4 MiB


[Stage 484:>                                                        (0 + 1) / 1]                                                                                

[{'mcap_date': datetime.date(2022, 1, 6),
  'asset_name': ['charg_coin', 'roge', 'neurotoken'],
  'log_ror': [0.25040658289005097, 0.2300847277541936, 0.22315158781277547],
  'weights': [0.5934458610550608, 0.11493828956433183, 0.2916158493806074],
  'mcap_value': [Decimal('5220183.74772765300000000000'),
   Decimal('2145261.20389485800000000000'),
   Decimal('1332808.07022394360000000000')]},
 {'mcap_date': datetime.date(2022, 1, 7),
  'asset_name': ['robotina', 'sentinel_group', 'gravity_finance'],
  'log_ror': [0.9659091703444093, 0.415784053356342, 0.24974754544640954],
  'weights': [0.6295957738354031, 0.05237806329355537, 0.3180261628710416],
  'mcap_value': [Decimal('1018345.62651913000000000000'),
   Decimal('5870017.17622237100000000000'),
   Decimal('2031961.70641057660000000000')]},
 {'mcap_date': datetime.date(2022, 1, 8),
  'asset_name': ['primecoin', 'neurochain', 'bitcoin_scrypt'],
  'log_ror': [0.45615688364912843, 0.4011424725185939, 0.28212772816557075],
  'weights': 

## Write MPT to MongoDB

* Collection name = "mpt."+date(YYYY-MM-DD)
* document structure: \_id, date, asset, mcap.value, mcap.weight, mcap.ror

In [20]:
_kwargs = {
    "DESTINDBNAME":'tip-daily-mpt',
    "COLLPREFIX" : 'mpt.for'
}
mpt_list_ = clsMPT.write_mpt_to_db(
    mpt_data=_l_exp_wts,
    cols_dict=_cols_dict,
    **kwargs,
)
print("Upsert %d documents" % len(mpt_list_))

Upsert 5 documents
