# Read n Clean MCap with pyspark

In [1]:
'''
    WARNING CONTROL to display or ignore all warnings
'''
import warnings; warnings.simplefilter('ignore')     #switch betweeb 'default' and 'ignore'
import traceback

''' Set debug flag to view extended error messages; else set it to False to turn off debugging mode '''
debug = True

In [209]:
import os
import sys
from datetime import datetime, date, timedelta

sys.path.insert(1,"/home/nuwan/workspace/rezaware/")
import rezaware as reza
from utils.modules.etl.load import sparkDBwls as sdb
from utils.modules.etl.transform import sparkCleanNRich as scne
from mining.modules.assets.etp import logReturns as log
from utils.modules.ml.timeseries import rollingstats as stats

''' restart initiate classes '''
if debug:
    import importlib
    reza = importlib.reload(reza)
    log = importlib.reload(log)
    sdb = importlib.reload(sdb)
    scne = importlib.reload(scne)
    stats= importlib.reload(stats)
    
__desc__ = "analyze crypto market capitalization time series data"
# clsSDB = sdb.SQLWorkLoads(desc=__desc__)
clsSCNR=scne.Transformer(desc=__desc__)
clsROR =log.RatioOfReturns(desc=__desc__)
clsStat=stats.RollingStats(desc=__desc__)
''' optional - if not specified class will use the default values '''
# prop_kwargs = {"WRITE_TO_TMP":True,   # necessary to emulate the etl dag
#               }
print("\nClass initialization and load complete!")

INFO:transform:########################################################
INFO:transform:sparkCleanNRich transform
DEBUG:transform:utils initialization for etl module package transform sparkCleanNRich done.
Start workloads: analyze crypto market capitalization time series data.
INFO:etp:########################################################
INFO:etp:logReturns Class
INFO:load:########################################################
INFO:load:sparkdbwls load
DEBUG:load:utils initialization for etl module package load sparkdbwls done.
Start workloads: analyze crypto market capitalization time series data.
INFO:transform:########################################################
INFO:transform:sparkCleanNRich transform
DEBUG:transform:utils initialization for etl module package transform sparkCleanNRich done.
Start workloads: analyze crypto market capitalization time series data.
INFO:load:########################################################
INFO:load:sparkNoSQLwls Class
DEBUG:load:util

All functional APP-libraries in REZAWARE-package of REZAWARE-module imported successfully!
All functional LOGRETURNS-libraries in ETP-package of ASSETS-module imported successfully!
All functional SPARKDBWLS-libraries in LOAD-package of ETL-module imported successfully!
All functional SPARKCLEANNRICH-libraries in TRANSFORM-package of ETL-module imported successfully!
All packages in utils ml timeseries RollingStats imported successfully!
sparkNoSQLwls Class initialization complete
logReturns Class initialization complete

Class initialization and load complete!


## Read data from mcap_past
We apply a query to select assets with mcap > 1.0 million. Any missing values are imputed with the mean value.

In [202]:
_from_date = '2022-01-01'
_to_date = '2022-01-05'
# _query = "select * from warehouse.mcap_past "+\
#         f"where mcap_date >= '{_from_date}' and "+\
#         f"mcap_date <= '{_to_date}'"
_query = "select * from warehouse.mcap_past "+\
        f"where mcap_date between '{_from_date}' and '{_to_date}' "+\
        f"and mcap_value > 1000000"
_kwargs = {
    "TABLENAME":'warehouse.mcap_past',
    "COLUMN":'mcap_date',
    "FROMDATETIME":_from_date,
    "TODATETIME":_to_date,
    "PARTITIONS":2,
    "AGGREGATE":'avg',
    "PIVCOLUMNS":['cofix','paypolitan-token','raven-protocol',
               'nft-index','beldex','mt-pelerin-shares']
}

# print(clsSpark.dbSchema)
mcap_sdf = clsROR.read_n_clean_mcap(query=_query,**_kwargs)
# mcap_sdf = clsROR.read_n_clean_mcap(**_kwargs)

print("Loaded %d rows and %d columns" % (mcap_sdf.count(),len(mcap_sdf.columns)))

DEBUG:load:@setter Spark PARTIONS set to: 2
DEBUG:load:@property Database dbType set to: postgresql
DEBUG:load:@property Database dbHostIP set to: 127.0.0.1
DEBUG:load:@property Database Port set to: 5432
DEBUG:load:@property Database dbName set to: tip
DEBUG:load:@property Database dbConnURL set to: jdbc:postgresql://127.0.0.1:5432/tip
DEBUG:load:@property Database dbUser set to: farmraider
DEBUG:load:@property Database dbPswd set to: spirittribe
DEBUG:load:@property Spark homeDir set to: /opt/spark/
DEBUG:load:@property Spark appName set to: utils etl load sparkdbwls
DEBUG:load:@property Spark config set to: spark.jars
DEBUG:load:@property Spark jarDir set to: /opt/spark/jars/postgresql-42.5.0.jar
DEBUG:load:@property Spark master set to: local[1]
DEBUG:load:function <@property session> importing SparkSession library from spark dir: /opt/spark/


Wait a moment, retrieving data ...


DEBUG:load:Non-type spark session set with homeDir: /opt/spark/ appName: utils etl load sparkdbwls conf: spark.jars jarDir: /opt/spark/jars/postgresql-42.5.0.jar master: local[1]
DEBUG:load:@property Spark rwFormat set to: jdbc
DEBUG:load:loaded 190825 rows into pyspark dataframe                            
DEBUG:etp:function <read_n_clean_mcap> loaded 190825 rows                       
DEBUG:transform:Pivot columns: ['cofix', 'beldex', 'dxsale_network', 'fydcoin', 'bitscreener', 'lixir_protocol', 'auction', 'dough', 'onooks', 'raydium', 'zclassic', 'balancer', 'algorand', 'etna_network', 'liquidus', 'coin', 'yield_yak', 'derivadao', 'mdex', 'utrust', 'ryo', 'cook', 'steem_dollars', 'mask_network', 'sail', 'moonlana', 'catapult', 'seedify_fund', 'secret_finance', 'dego_finance', 'mixmarvel', 'alchemix', 'enq_enecuum', 'yield_guild_games', 'rio_defi', 'deeponion', 'denarius', 'iteration_syndicate', 'add_xyz_new', 'badger_dao', 'sense', 'konomi_network', 'cnns', 'gny', 'unidex', 'carvert

DEBUG:transform:Transposing 190825 rows groupby MCAP_DATE to pivot with distinct values in ASSET_NAME and AVG aggregation on column(s): mcap_value
[Stage 3432:>                                                       (0 + 1) / 1]

23/01/26 23:21:58 WARN DAGScheduler: Broadcasting large task binary with size 3.3 MiB


[Stage 3435:>                                                       (0 + 1) / 1]

23/01/26 23:22:16 WARN DAGScheduler: Broadcasting large task binary with size 6.3 MiB


DEBUG:etp:function <impute_wrapper> ran an impute on all 1679 asset tickers     
DEBUG:transform:NULL count completed for 1679 columns                           
[Stage 3446:>                                                       (0 + 1) / 1]

23/01/26 23:25:09 WARN DAGScheduler: Broadcasting large task binary with size 3.2 MiB


[Stage 3456:>                                                       (0 + 1) / 1]

23/01/26 23:26:28 WARN DAGScheduler: Broadcasting large task binary with size 3.2 MiB


DEBUG:etp:After unpivot, dataframe with rows 8395 columns 3                     
[Stage 3466:>                                                       (0 + 1) / 1]

23/01/26 23:27:45 WARN DAGScheduler: Broadcasting large task binary with size 3.2 MiB


[Stage 3469:>                                                       (0 + 1) / 1]

Loaded 8395 rows and 3 columns


                                                                                

## Compute LogROR for all assets

In [203]:
kwargs={
    "PREVALCOLNAME":'mcap_prev_val',
    "DIFFCOLNAME":'mcap_diff',
    "LOGCOLNAME":'log_ror'
}
_mcap_log_ror, _log_col = clsROR.get_log_ror(
    data=mcap_sdf,
    num_col_name="mcap_value",
    part_column ='asset_name',
    **kwargs,
)

_mcap_log_ror.filter(_mcap_log_ror.log_ror.isNotNull()).show(n=3)

[Stage 3476:>                                                       (0 + 1) / 1]

23/01/26 23:29:16 WARN DAGScheduler: Broadcasting large task binary with size 3.2 MiB


[Stage 3486:>                                                       (0 + 1) / 1]

23/01/26 23:30:51 WARN DAGScheduler: Broadcasting large task binary with size 3.2 MiB


[Stage 3496:>                                                       (0 + 1) / 1]

23/01/26 23:32:08 WARN DAGScheduler: Broadcasting large task binary with size 3.2 MiB


                                                                                

23/01/26 23:32:15 WARN DAGScheduler: Broadcasting large task binary with size 3.2 MiB


[Stage 3502:>                                                       (0 + 1) / 1]

23/01/26 23:32:19 WARN DAGScheduler: Broadcasting large task binary with size 4.5 MiB


[Stage 3506:>                                                       (0 + 1) / 1]

23/01/26 23:32:20 WARN DAGScheduler: Broadcasting large task binary with size 4.5 MiB


[Stage 3511:>                                                       (0 + 1) / 1]

+----------+----------+--------------------+--------------------+--------------------+--------------------+
| mcap_date|asset_name|          mcap_value| mcap_value_prev_val|           mcap_diff|             log_ror|
+----------+----------+--------------------+--------------------+--------------------+--------------------+
|2022-01-02|  switcheo|25959201.75722492...|11336695.15087185...|14622506.60635306...|-0.35980487434341657|
|2022-01-02| primecoin|4869520.968668153...|2382648.070384798...|2486872.898283354...| -0.3104265594101802|
|2022-01-02| cypherium|11824476.85075602...|6272519.311846281...|5551957.538909744...| -0.2753399906925486|
+----------+----------+--------------------+--------------------+--------------------+--------------------+
only showing top 3 rows



                                                                                

## Weighted Portfolio

In [204]:
_cols={
    "NAMECOLUMN":'asset_name',
    "DATECOLUMN":'mcap_date',
    "RORCOLUMN" :'log_ror',
    "MCAPCOLUMN":'mcap_value',
    "WEIGHTCOLUMN":'weights',
}
_l_exp_wts,_cols_dict=clsROR.get_weighted_mpt(
    data=_mcap_log_ror,
    cols_dict=_cols,
#     date_col='mcap_date',
#     val_col='log_ror',
#     name_col='asset_name',
    topN=3,
    size=5,
    **_kwargs,
)
# print("Dates: %s" % str(_wr_dates))
# print("Data : row=%d columns=%d" % (_wr_data.count(),len(_wr_data.columns)))
_l_exp_wts

[Stage 3514:>                                                       (0 + 1) / 1]

23/01/26 23:49:22 WARN DAGScheduler: Broadcasting large task binary with size 3.2 MiB


                                                                                

23/01/26 23:49:26 WARN DAGScheduler: Broadcasting large task binary with size 3.2 MiB


[Stage 3520:>                                                       (0 + 1) / 1]

23/01/26 23:49:30 WARN DAGScheduler: Broadcasting large task binary with size 4.5 MiB


[Stage 3532:>                                                       (0 + 1) / 1]

23/01/26 23:50:47 WARN DAGScheduler: Broadcasting large task binary with size 3.2 MiB


                                                                                

23/01/26 23:50:52 WARN DAGScheduler: Broadcasting large task binary with size 3.2 MiB


[Stage 3538:>                                                       (0 + 1) / 1]

23/01/26 23:50:57 WARN DAGScheduler: Broadcasting large task binary with size 4.5 MiB


[Stage 3542:>                                                       (0 + 1) / 1]

23/01/26 23:50:59 WARN DAGScheduler: Broadcasting large task binary with size 4.5 MiB


[Stage 3556:>                                                       (0 + 1) / 1]

23/01/26 23:52:14 WARN DAGScheduler: Broadcasting large task binary with size 3.2 MiB


                                                                                

23/01/26 23:52:18 WARN DAGScheduler: Broadcasting large task binary with size 3.2 MiB


[Stage 3562:>                                                       (0 + 1) / 1]

23/01/26 23:52:23 WARN DAGScheduler: Broadcasting large task binary with size 4.5 MiB


[Stage 3566:>                                                       (0 + 1) / 1]

23/01/26 23:52:25 WARN DAGScheduler: Broadcasting large task binary with size 4.5 MiB


[Stage 3580:>                                                       (0 + 1) / 1]

23/01/26 23:53:38 WARN DAGScheduler: Broadcasting large task binary with size 3.2 MiB


                                                                                

23/01/26 23:53:42 WARN DAGScheduler: Broadcasting large task binary with size 3.2 MiB


[Stage 3586:>                                                       (0 + 1) / 1]

23/01/26 23:53:46 WARN DAGScheduler: Broadcasting large task binary with size 4.5 MiB


DEBUG:etp:Retrieved a valid dataframe with 6716 of 8395 rows from orignal dataframe
[Stage 3598:>                                                       (0 + 1) / 1]

23/01/26 23:54:59 WARN DAGScheduler: Broadcasting large task binary with size 3.2 MiB


                                                                                

23/01/26 23:55:03 WARN DAGScheduler: Broadcasting large task binary with size 3.2 MiB


[Stage 3604:>                                                       (0 + 1) / 1]

23/01/26 23:55:08 WARN DAGScheduler: Broadcasting large task binary with size 4.5 MiB


[Stage 3608:>                                                       (0 + 1) / 1]

23/01/26 23:55:11 WARN DAGScheduler: Broadcasting large task binary with size 4.5 MiB


[Stage 3613:>                                                       (0 + 1) / 1]                                                                                

23/01/26 23:55:11 WARN DAGScheduler: Broadcasting large task binary with size 4.5 MiB


[Stage 3618:>                                                       (0 + 1) / 1]

23/01/26 23:55:14 WARN DAGScheduler: Broadcasting large task binary with size 4.5 MiB


[Stage 3627:>                                                       (0 + 1) / 1]

23/01/26 23:56:25 WARN DAGScheduler: Broadcasting large task binary with size 3.2 MiB


                                                                                

23/01/26 23:56:29 WARN DAGScheduler: Broadcasting large task binary with size 3.2 MiB


[Stage 3633:>                                                       (0 + 1) / 1]

23/01/26 23:56:33 WARN DAGScheduler: Broadcasting large task binary with size 4.5 MiB
23/01/26 23:56:34 WARN DAGScheduler: Broadcasting large task binary with size 4.5 MiB


[Stage 3651:>                                                       (0 + 1) / 1]

23/01/26 23:57:47 WARN DAGScheduler: Broadcasting large task binary with size 3.2 MiB


                                                                                

23/01/26 23:57:50 WARN DAGScheduler: Broadcasting large task binary with size 3.2 MiB


[Stage 3657:>                                                       (0 + 1) / 1]

23/01/26 23:57:55 WARN DAGScheduler: Broadcasting large task binary with size 4.5 MiB


[Stage 3661:>                                                       (0 + 1) / 1]

23/01/26 23:57:56 WARN DAGScheduler: Broadcasting large task binary with size 4.5 MiB


[Stage 3675:>                                                       (0 + 1) / 1]

23/01/26 23:59:08 WARN DAGScheduler: Broadcasting large task binary with size 3.2 MiB


                                                                                

23/01/26 23:59:11 WARN DAGScheduler: Broadcasting large task binary with size 3.2 MiB


[Stage 3681:>                                                       (0 + 1) / 1]

23/01/26 23:59:16 WARN DAGScheduler: Broadcasting large task binary with size 4.5 MiB


[Stage 3685:>                                                       (0 + 1) / 1]

23/01/26 23:59:17 WARN DAGScheduler: Broadcasting large task binary with size 4.5 MiB


[Stage 3699:>                                                       (0 + 1) / 1]

23/01/27 00:00:30 WARN DAGScheduler: Broadcasting large task binary with size 3.2 MiB


                                                                                

23/01/27 00:00:34 WARN DAGScheduler: Broadcasting large task binary with size 3.2 MiB


[Stage 3705:>                                                       (0 + 1) / 1]

23/01/27 00:00:39 WARN DAGScheduler: Broadcasting large task binary with size 4.5 MiB


[Stage 3709:>                                                       (0 + 1) / 1]

23/01/27 00:00:40 WARN DAGScheduler: Broadcasting large task binary with size 4.5 MiB


DEBUG:etp:Proceeding with data for [datetime.date(2022, 1, 2), datetime.date(2022, 1, 3), datetime.date(2022, 1, 4), datetime.date(2022, 1, 5)] dates
[Stage 3723:>                                                       (0 + 1) / 1]

23/01/27 00:01:55 WARN DAGScheduler: Broadcasting large task binary with size 3.2 MiB


                                                                                

23/01/27 00:01:58 WARN DAGScheduler: Broadcasting large task binary with size 3.2 MiB


[Stage 3729:>                                                       (0 + 1) / 1]

23/01/27 00:02:03 WARN DAGScheduler: Broadcasting large task binary with size 4.5 MiB


[Stage 3733:>                                                       (0 + 1) / 1]

23/01/27 00:02:05 WARN DAGScheduler: Broadcasting large task binary with size 4.5 MiB


[Stage 3738:>                                                       (0 + 1) / 1]                                                                                

23/01/27 00:02:05 WARN DAGScheduler: Broadcasting large task binary with size 4.5 MiB


[Stage 3743:>                                                       (0 + 1) / 1]

23/01/27 00:02:07 WARN DAGScheduler: Broadcasting large task binary with size 4.5 MiB


DEBUG:etp:Retrieved 1679 asset data rows for 2022-01-02                         
DEBUG:etp:3 rows for 3 topN retrieved assets: ['eco_value_coin' 'defit' 'bata'] 
DEBUG:etp:Generated random weights with dimensions 2
DEBUG:etp:[[0.2455405  0.66494154 0.08951796]
 [0.77045533 0.22529512 0.00424955]
 [0.46083621 0.46490044 0.07426335]
 [0.7347999  0.21780459 0.04739551]
 [0.08588294 0.08394898 0.83016808]]
DEBUG:etp:Multiplied array generated weighted returns with 2 dimensions
DEBUG:etp:[[0.08713239 0.17175906 0.02106942]
 [0.27340343 0.05819531 0.0010002 ]
 [0.16353213 0.12008704 0.01747902]
 [0.26075075 0.05626045 0.01115526]
 [0.03047638 0.02168461 0.19539276]]
DEBUG:etp:Sum of each randomized weighted portfolio with 1 dimensions [0.27996087 0.33259893 0.30109818 0.32816647 0.24755375]
DEBUG:etp:Maximum weighted row index 1
[Stage 3752:>                                                       (0 + 1) / 1]

23/01/27 00:03:23 WARN DAGScheduler: Broadcasting large task binary with size 3.2 MiB


                                                                                

23/01/27 00:03:26 WARN DAGScheduler: Broadcasting large task binary with size 3.2 MiB


[Stage 3758:>                                                       (0 + 1) / 1]

23/01/27 00:03:30 WARN DAGScheduler: Broadcasting large task binary with size 4.5 MiB


[Stage 3762:>                                                       (0 + 1) / 1]

23/01/27 00:03:33 WARN DAGScheduler: Broadcasting large task binary with size 4.5 MiB


[Stage 3767:>                                                       (0 + 1) / 1]                                                                                

23/01/27 00:03:33 WARN DAGScheduler: Broadcasting large task binary with size 4.5 MiB
23/01/27 00:03:35 WARN DAGScheduler: Broadcasting large task binary with size 4.5 MiB


DEBUG:etp:Retrieved 1679 asset data rows for 2022-01-03                         
DEBUG:etp:3 rows for 3 topN retrieved assets: ['bitcoinpos' 'cypherium' 'merge'] 
DEBUG:etp:Generated random weights with dimensions 2
DEBUG:etp:[[0.20151767 0.67979804 0.11868429]
 [0.0023878  0.38619926 0.61141294]
 [0.28757203 0.40095235 0.31147562]
 [0.05957886 0.07831058 0.86211056]
 [0.27693719 0.57578193 0.14728087]]
DEBUG:etp:Multiplied array generated weighted returns with 2 dimensions
DEBUG:etp:[[0.10811883 0.19743827 0.02780693]
 [0.00128111 0.11216642 0.14324992]
 [0.15428896 0.11645126 0.07297663]
 [0.03196542 0.02274426 0.20198668]
 [0.14858313 0.16722818 0.03450691]]
DEBUG:etp:Sum of each randomized weighted portfolio with 1 dimensions [0.33336402 0.25669745 0.34371685 0.25669636 0.35031822]
DEBUG:etp:Maximum weighted row index 4
[Stage 3781:>                                                       (0 + 1) / 1]

23/01/27 00:04:47 WARN DAGScheduler: Broadcasting large task binary with size 3.2 MiB


                                                                                

23/01/27 00:04:50 WARN DAGScheduler: Broadcasting large task binary with size 3.2 MiB


[Stage 3787:>                                                       (0 + 1) / 1]

23/01/27 00:04:55 WARN DAGScheduler: Broadcasting large task binary with size 4.5 MiB


[Stage 3791:>                                                       (0 + 1) / 1]

23/01/27 00:04:57 WARN DAGScheduler: Broadcasting large task binary with size 4.5 MiB


[Stage 3796:>                                                       (0 + 1) / 1]                                                                                

23/01/27 00:04:58 WARN DAGScheduler: Broadcasting large task binary with size 4.5 MiB


[Stage 3801:>                                                       (0 + 1) / 1]

23/01/27 00:05:00 WARN DAGScheduler: Broadcasting large task binary with size 4.5 MiB


DEBUG:etp:Retrieved 1679 asset data rows for 2022-01-04                         
DEBUG:etp:3 rows for 3 topN retrieved assets: ['primecoin' 'freicoin' 'the_corgi_of_polkabridge'] 
DEBUG:etp:Generated random weights with dimensions 2
DEBUG:etp:[[0.35942933 0.61637532 0.02419535]
 [0.10149592 0.33758528 0.56091881]
 [0.21404052 0.45931416 0.32664532]
 [0.53524858 0.26246625 0.20228517]
 [0.40847781 0.57028678 0.02123541]]
DEBUG:etp:Multiplied array generated weighted returns with 2 dimensions
DEBUG:etp:[[0.06019566 0.0783117  0.00306519]
 [0.0169981  0.04289087 0.07106012]
 [0.03584658 0.05835677 0.04138113]
 [0.08964111 0.03334685 0.02562654]
 [0.06841009 0.07245606 0.00269021]]
DEBUG:etp:Sum of each randomized weighted portfolio with 1 dimensions [0.14157255 0.13094909 0.13558448 0.1486145  0.14355636]
DEBUG:etp:Maximum weighted row index 3
[Stage 3810:>                                                       (0 + 1) / 1]

23/01/27 00:06:12 WARN DAGScheduler: Broadcasting large task binary with size 3.2 MiB


                                                                                

23/01/27 00:06:16 WARN DAGScheduler: Broadcasting large task binary with size 3.2 MiB


[Stage 3816:>                                                       (0 + 1) / 1]

23/01/27 00:06:20 WARN DAGScheduler: Broadcasting large task binary with size 4.5 MiB


[Stage 3820:>                                                       (0 + 1) / 1]

23/01/27 00:06:22 WARN DAGScheduler: Broadcasting large task binary with size 4.5 MiB


                                                                                

23/01/27 00:06:22 WARN DAGScheduler: Broadcasting large task binary with size 4.5 MiB


[Stage 3830:>                                                       (0 + 1) / 1]

23/01/27 00:06:24 WARN DAGScheduler: Broadcasting large task binary with size 4.5 MiB


DEBUG:etp:Retrieved 1679 asset data rows for 2022-01-05                         
DEBUG:etp:3 rows for 3 topN retrieved assets: ['switcheo' 'cypherium' 'bata'] 
DEBUG:etp:Generated random weights with dimensions 2
DEBUG:etp:[[0.07823604 0.0363523  0.88541167]
 [0.03241786 0.55106731 0.41651483]
 [0.49546224 0.14343015 0.36110761]
 [0.48265701 0.06727906 0.45006393]
 [0.23179664 0.4741772  0.29402616]]
DEBUG:etp:Multiplied array generated weighted returns with 2 dimensions
DEBUG:etp:[[0.06965087 0.0105107  0.19963038]
 [0.02886051 0.15933249 0.09391   ]
 [0.44109308 0.04147058 0.08141755]
 [0.42969302 0.01945269 0.10147419]
 [0.20636062 0.13710092 0.06629295]]
DEBUG:etp:Sum of each randomized weighted portfolio with 1 dimensions [0.27979194 0.282103   0.56398121 0.5506199  0.40975449]
DEBUG:etp:Maximum weighted row index 2
INFO:etp:Completed maximized weights for 4 portfolio(s)


[{'mcap_date': datetime.date(2022, 1, 2),
  'asset_name': ['eco_value_coin', 'defit', 'bata'],
  'log_ror': [0.3548595414839115, 0.25830700717589633, 0.23536530334128117],
  'weights': [0.7704553297762768, 0.22529511751785014, 0.004249552705873011],
  'mcap_value': [Decimal('6236804.35301152300000000000'),
   Decimal('1000748.52084980000000000000'),
   Decimal('2048489.12741518840000000000')]},
 {'mcap_date': datetime.date(2022, 1, 3),
  'asset_name': ['bitcoinpos', 'cypherium', 'merge'],
  'log_ror': [0.536522839064536, 0.29043664872179714, 0.2342932451898627],
  'weights': [0.27693719094463143, 0.5757819347894347, 0.14728087426593373],
  'mcap_value': [Decimal('7820145.33051354000000000000'),
   Decimal('6058222.00732905600000000000'),
   Decimal('1557677.11527762590000000000')]},
 {'mcap_date': datetime.date(2022, 1, 4),
  'asset_name': ['primecoin', 'freicoin', 'the_corgi_of_polkabridge'],
  'log_ror': [0.16747566087979177, 0.12705196679285938, 0.1266852098744721],
  'weights': [0.

## Write MPT to MongoDB

* Collection name = "mpt."+date(YYYY-MM-DD)
* document structure: \_id, date, asset, mcap.value, mcap.weight, mcap.ror

In [206]:
_uniq_dates = set([x['date'] for x in mpt_list_])
for _date in _uniq_dates:
    _mpt_for_date = list(filter(lambda d: d['date'] == _date, mpt_list_))
    print("\n",_mpt_for_date)


 [{'date': '2022-01-04T00:00:00', 'asset': 'primecoin', 'mcap.weight': 0.5352485801987441, 'mcap.ror': 0.16747566087979177, 'mcap.value': 3666988.725026001, '_id': ObjectId('63d2a5ac429b0bf378f27909')}, {'date': '2022-01-04T00:00:00', 'asset': 'freicoin', 'mcap.weight': 0.2624662456667546, 'mcap.ror': 0.12705196679285938, 'mcap.value': 1167534.4669345282, '_id': ObjectId('63d2a5ac429b0bf378f2790a')}, {'date': '2022-01-04T00:00:00', 'asset': 'the_corgi_of_polkabridge', 'mcap.weight': 0.20228517413450128, 'mcap.ror': 0.1266852098744721, 'mcap.value': 1030406.954412614, '_id': ObjectId('63d2a5ac429b0bf378f2790b')}]

 [{'date': '2022-01-03T00:00:00', 'asset': 'bitcoinpos', 'mcap.weight': 0.27693719094463143, 'mcap.ror': 0.536522839064536, 'mcap.value': 7820145.33051354, '_id': ObjectId('63d2a5ac429b0bf378f27906')}, {'date': '2022-01-03T00:00:00', 'asset': 'cypherium', 'mcap.weight': 0.5757819347894347, 'mcap.ror': 0.29043664872179714, 'mcap.value': 6058222.007329056, '_id': ObjectId('63d2

In [210]:
_kwargs = {
    "DESTINDBNAME":'tip-daily-mpt',
    "COLLPREFIX" : 'mpt.for'
}
mpt_list_ = clsROR.write_mpt_to_db(
    mpt_data=_l_exp_wts,
    cols_dict=_cols_dict,
    **kwargs,
)
print(mpt_list_)

DEBUG:etp:Created database collection ready dict list with 12 documents
DEBUG:load:MongoClient(host=['127.0.0.1:27017'], document_class=dict, tz_aware=False, connect=True, authsource='tip-daily-mpt', authmechanism='SCRAM-SHA-256')
DEBUG:load:Writing document to mongodb
INFO:load:Total 3 documents, successful insert count = 0 & modify count = 3
DEBUG:etp:3 documents written to mpt.2022-01-04 collection
DEBUG:load:Writing document to mongodb
INFO:load:Created a new collection Collection(Database(MongoClient(host=['127.0.0.1:27017'], document_class=dict, tz_aware=False, connect=True, authsource='tip-daily-mpt', authmechanism='SCRAM-SHA-256'), 'tip-daily-mpt'), 'mpt.2022-01-03')
INFO:load:Inserted 3 documents
DEBUG:etp:3 documents written to mpt.2022-01-03 collection
DEBUG:load:Writing document to mongodb
INFO:load:Created a new collection Collection(Database(MongoClient(host=['127.0.0.1:27017'], document_class=dict, tz_aware=False, connect=True, authsource='tip-daily-mpt', authmechanism='

Total 3 documents, successful insert count = 0 & modify count = 3
[[{'date': '2022-01-04T00:00:00', 'asset': 'primecoin', 'mcap.weight': 0.5352485801987441, 'mcap.ror': 0.16747566087979177, 'mcap.value': 3666988.725026001}, {'date': '2022-01-04T00:00:00', 'asset': 'freicoin', 'mcap.weight': 0.2624662456667546, 'mcap.ror': 0.12705196679285938, 'mcap.value': 1167534.4669345282}, {'date': '2022-01-04T00:00:00', 'asset': 'the_corgi_of_polkabridge', 'mcap.weight': 0.20228517413450128, 'mcap.ror': 0.1266852098744721, 'mcap.value': 1030406.954412614}], [{'date': '2022-01-03T00:00:00', 'asset': 'bitcoinpos', 'mcap.weight': 0.27693719094463143, 'mcap.ror': 0.536522839064536, 'mcap.value': 7820145.33051354, '_id': ObjectId('63d346a3429b0bf378f27914')}, {'date': '2022-01-03T00:00:00', 'asset': 'cypherium', 'mcap.weight': 0.5757819347894347, 'mcap.ror': 0.29043664872179714, 'mcap.value': 6058222.007329056, '_id': ObjectId('63d346a3429b0bf378f27915')}, {'date': '2022-01-03T00:00:00', 'asset': 'merg

## SMA

In [62]:
kwargs={
    "DATETIMEATTR":'mcap_date',
    "WINLENGTH":7,
    "WINUNIT":'DAY',
}
_sma_sdf=clsStat.simple_moving_stats(
    column='diff',   # column name to apply the rolling computation
    stat_op="mean", # stat operation sum, mean or standard deviation
    data=_mcap_log_ror,   # data set
    **kwargs,    # 
)


In [63]:
_sma_sdf.show()

23/01/18 16:08:49 WARN WindowExec: No Partition Defined for Window operation! Moving all data to a single partition, this can cause serious performance degradation.
23/01/18 16:08:49 WARN WindowExec: No Partition Defined for Window operation! Moving all data to a single partition, this can cause serious performance degradation.
23/01/18 16:08:49 WARN WindowExec: No Partition Defined for Window operation! Moving all data to a single partition, this can cause serious performance degradation.


[Stage 289:>                                                        (0 + 1) / 1]

23/01/18 16:09:19 WARN WindowExec: No Partition Defined for Window operation! Moving all data to a single partition, this can cause serious performance degradation.
23/01/18 16:09:19 WARN WindowExec: No Partition Defined for Window operation! Moving all data to a single partition, this can cause serious performance degradation.


[Stage 291:>                                                        (0 + 1) / 1]

23/01/18 16:09:21 WARN WindowExec: No Partition Defined for Window operation! Moving all data to a single partition, this can cause serious performance degradation.
23/01/18 16:09:21 WARN WindowExec: No Partition Defined for Window operation! Moving all data to a single partition, this can cause serious performance degradation.
23/01/18 16:09:22 WARN DAGScheduler: Broadcasting large task binary with size 3.3 MiB


[Stage 294:>                                                        (0 + 1) / 1]

23/01/18 16:09:27 WARN WindowExec: No Partition Defined for Window operation! Moving all data to a single partition, this can cause serious performance degradation.
23/01/18 16:09:27 WARN WindowExec: No Partition Defined for Window operation! Moving all data to a single partition, this can cause serious performance degradation.
23/01/18 16:09:29 WARN DAGScheduler: Broadcasting large task binary with size 4.7 MiB


[Stage 298:>                                                        (0 + 1) / 1]

23/01/18 16:09:30 WARN WindowExec: No Partition Defined for Window operation! Moving all data to a single partition, this can cause serious performance degradation.
23/01/18 16:09:30 WARN WindowExec: No Partition Defined for Window operation! Moving all data to a single partition, this can cause serious performance degradation.
23/01/18 16:09:32 WARN DAGScheduler: Broadcasting large task binary with size 4.7 MiB


[Stage 303:>                                                        (0 + 1) / 1]

+-------------------+------------------+--------------------+------------------+--------------------+--------------------+
|          mcap_date|        asset_name|          mcap_value|         mcap_diff|                diff|   rolling_mean_diff|
+-------------------+------------------+--------------------+------------------+--------------------+--------------------+
|2022-01-01 00:00:00|            0chain|18581739.78078547...| 7.290301186807742|-930233.240037870...|66178035.76083997...|
|2022-01-01 00:00:00|              1_up|6580366.707037671...|   6.8616358287375|-691331.205561129...|66178035.76083997...|
|2022-01-01 00:00:00|              1art|20450189.55773175...| 7.363364636916094|-2636658.08537944...|66178035.76083997...|
|2022-01-01 00:00:00|             1inch|1025227565.328557...| 8.979442079049766|71461032.82505580...|66178035.76083997...|
|2022-01-01 00:00:00|            1world|2254182.045747336...| 6.312876221139709|198877.3184413719...|66178035.76083997...|
|2022-01-01 00:0

                                                                                