# Document the Data Sources
* [Coingecko python code](https://github.com/man-c/pycoingecko)

In [1]:
'''
    WARNING CONTROL to display or ignore all warnings
'''
import warnings; warnings.simplefilter('ignore')     #switch betweeb 'default' and 'ignore'
import traceback

''' Set debug flag to view extended error messages; else set it to False to turn off debugging mode '''
debug = True

## Initialize classes

In [80]:
import os
import sys
import configparser
from datetime import datetime, date, timedelta

sys.path.insert(1,"/home/nuwan/workspace/rezaware/")
import rezaware as reza
from wrangler.modules.assets.etl import cryptoMCExtractor as mcap
from utils.modules.etl.load import sparkDBwls as spark
from utils.modules.etl.load import sparkNoSQLwls as nosql

''' restart initiate classes '''
if debug:
    import importlib
    reza = importlib.reload(reza)
    mcap = importlib.reload(mcap)
    nosql = importlib.reload(nosql)
    spark = importlib.reload(spark)
    
__desc__ = "get crypto macket capitalization data"
clsNoSQL = nosql.NoSQLWorkLoads(desc=__desc__)
clsSpark = spark.SQLWorkLoads(desc=__desc__)
''' optional - if not specified class will use the default values '''
# prop_kwargs = {"WRITE_TO_TMP":True,   # necessary to emulate the etl dag
#               }
clsMC = mcap.CryptoMarkets(desc=__desc__)
print("\nClass initialization and load complete!")

All functional APP-libraries in REZAWARE-package of REZAWARE-module imported successfully!
All assets-module etl-packages in function-CryptoMarket imported successfully!
All functional SPARKNOSQLWLS-libraries in LOAD-package of ETL-module imported successfully!
All functional SPARKDBWLS-libraries in LOAD-package of ETL-module imported successfully!
sparkNoSQLwls Class initialization complete
sparkNoSQLwls Class initialization complete
CryptoMarket Class initialization complete

Class initialization and load complete!


## Transform collections into Spark DataFrame

In [84]:
# _data_owner='coingecko'
__SOURCEDBNAME__ = 'tip-historic-marketcap'
__DESTINDBNAME__ = 'tip'
__DESTINTABLE__ = 'mcap_past'

_from_dt=date(2022,1,5)
_to_dt=date(2022,1,10)
_kwargs = {
#     "SOURCEDBNAME":'tip-historic-marketcap',
    "DESTINTBLNAME":'mcap_past',
    "DBAUTHSOURCE":'tip-historic-marketcap',
#     "COLLLIST":['coingecko.2022-04-01.btc','coingecko.2022-04-01.etc'],
#     "COLLLIST":['coingecko.2022-07-01.axial'],
   "HASINNAME":'coingecko.2021-01-01',
#     "FIND":{'symbol':{'$eq':'btc'}},   # use the find key to define a filter
    "COLUMNSMAP":{'_id':'uuid',
                  'id':'asset_name',
                 'source':'data_source',
                 'symbol':'asset_symbol',
                 'date':'mcap_date',
                 'marketcap':'mcap_value'
                }
}
_data = clsMC.nosql_to_sql(
    source_db=__SOURCEDBNAME__,
    coll_list=[],
    destin_db=__DESTINDBNAME__,
    table_name=__DESTINTABLE__,
    **_kwargs,
)

Validating input parameters ...
[Error]function <nosql_to_sql> Unable to locate any collection in tip-historic-marketcap database
Traceback (most recent call last):
  File "/home/nuwan/workspace/rezaware/wrangler/modules/assets/etl/cryptoMCExtractor.py", line 969, in nosql_to_sql
    raise ValueError("Unable to locate any collection in %s database"
ValueError: Unable to locate any collection in tip-historic-marketcap database



In [66]:
print(_data.shape)
_data

(91, 5)


Unnamed: 0,uuid,data_source,asset_symbol,mcap_date,mcap_value
0,639044199d51d344925149ef,coingecko,axial,2022-07-01,178105.122969
1,639044199d51d344925149f0,coingecko,axial,2022-07-02,176269.472145
2,639044199d51d344925149f1,coingecko,axial,2022-07-03,177148.740073
3,639044199d51d344925149f2,coingecko,axial,2022-07-04,176486.251422
4,639044199d51d344925149f3,coingecko,axial,2022-07-05,189309.136926
...,...,...,...,...,...
86,639044199d51d34492514a45,coingecko,axial,2022-09-25,95285.571493
87,639044199d51d34492514a46,coingecko,axial,2022-09-26,93002.035327
88,639044199d51d34492514a47,coingecko,axial,2022-09-27,93749.528665
89,639044199d51d34492514a48,coingecko,axial,2022-09-28,93534.185996


## Spark MongoDB experiment

In [55]:
print(clsNoSQL.dbType,
      clsNoSQL.dbHostIP,
      clsNoSQL.dbPort,
      clsNoSQL.dbFormat,
      clsNoSQL.dbName,
      clsNoSQL.dbUser,
      clsNoSQL.dbPswd,
      clsNoSQL.dbAuthSource,
      clsNoSQL.dbAuthMechanism,
      clsNoSQL.sparkMaster,
     )

mongodb 127.0.0.1 27017 mongo tip-asset-metadata farmraider spirittribe tip-asset-metadata SCRAM-SHA-256 local[1]


In [51]:
from pyspark.sql import SparkSession
_appName = " ".join(
    [clsNoSQL.__app__,
     clsNoSQL.__name__,
     clsNoSQL.__package__,
     clsNoSQL.__module__
    ])   #"PySpark MongoDB Examples"
_master = clsNoSQL.sparkMaster  # "local[1]"
_h_ip = clsNoSQL.dbHostIP #"127.0.0.1"
_type = clsNoSQL.dbType   # mongodb
_port = clsNoSQL.dbPort   # 27017
_format = clsNoSQL.dbFormat # mongo
_user = clsNoSQL.dbUser   # "farmraider"
_pswd = clsNoSQL.dbPswd   # "spirittribe"
_auth = clsNoSQL.dbAuthSource   #"tip-historic-marketcap"
_db = clsNoSQL.dbName     # "tip-historic-marketcap"
_coll = 'coingecko.2022-07-01.btc'

_inp_uri = f"{_type}://{_user}:{_pswd}@{_h_ip}/{_db}.{_coll}?authSource={_auth}"
_out_uri = f"{_type}://{_user}:{_pswd}@{_h_ip}/{_db}.{_coll}?authSource={_auth}"

# Create Spark session
spark = SparkSession.builder \
    .appName(_appName) \
    .master(_master) \
    .config("spark.mongodb.input.uri", _inp_uri) \
    .config("spark.mongodb.output.uri", _out_uri) \
    .getOrCreate()

# Read data from MongoDB
df = spark.read.format(_format).load()
df.printSchema()
df.show()

22/12/16 15:15:19 WARN SparkSession: Using an existing Spark session; only runtime SQL configurations will take effect.
root
 |-- _id: struct (nullable = true)
 |    |-- oid: string (nullable = true)
 |-- date: timestamp (nullable = true)
 |-- marketcap: double (nullable = true)
 |-- source: string (nullable = true)
 |-- symbol: string (nullable = true)

+--------------------+-------------------+--------------------+---------+------+
|                 _id|               date|           marketcap|   source|symbol|
+--------------------+-------------------+--------------------+---------+------+
|{6390a3bf9d51d344...|2022-07-01 16:00:00|3.659842243234527...|coingecko|   btc|
|{6390a3bf9d51d344...|2022-07-02 16:00:00|3.709165299187163E11|coingecko|   btc|
|{6390a3bf9d51d344...|2022-07-03 16:00:00| 3.67706689137568E11|coingecko|   btc|
|{6390a3bf9d51d344...|2022-07-04 16:00:00|3.685493641350522...|coingecko|   btc|
|{6390a3bf9d51d344...|2022-07-05 16:00:00|3.864251762627784...|coingecko|   