In [2]:
import nasdaqdatalink as nd
from sqlalchemy import create_engine, MetaData, Table, Column, Integer, Float, String, inspect
from sqlalchemy.exc import SQLAlchemyError
import pandas as pd
import os
import gzip
import shutil
from concurrent.futures import ThreadPoolExecutor, as_completed
import logging
import time
from dotenv import load_dotenv
import pickle
logging.basicConfig(level=logging.DEBUG)
logger = logging.getLogger(__name__)
load_dotenv()

True

In [3]:
NASDAQ_API = os.environ.get('NASDAQ_API')
nd.ApiConfig.api_key = NASDAQ_API

In [4]:
MYSQL_HOST = os.getenv("MYSQL_HOST")
MYSQL_USER = os.getenv("MYSQL_USER")
MYSQL_PASSWORD = os.getenv("MYSQL_PASSWORD")
MYSQL_DB = os.getenv("MYSQL_DB")


DATABASE_URL = f"mysql+mysqlconnector://{MYSQL_USER}:{MYSQL_PASSWORD}@{MYSQL_HOST}/{MYSQL_DB}"
engine = create_engine(DATABASE_URL)

In [5]:
MYSQL_DB

'nasdaqdata'

In [6]:
def save_to_csv(dataframe, file_path):
    dataframe.to_csv(file_path, index=False)

def compress_file(file_path):
    with open(file_path, 'rb') as f_in:
        with gzip.open(file_path + '.gz', 'wb') as f_out:
            shutil.copyfileobj(f_in, f_out)
    os.remove(file_path)



def download_data(table, **filters):
    try:
        data = nd.get_table(table, **filters)
        return table, data
    except Exception as e:
        print(f"Error downloading data for {table}: {str(e)}")
        return table, None

def save_to_db(dataframe, table_name, engine):
    try:
        # metadata = MetaData()
        # metadata.reflect(bind=engine)
        table_name = table_name.replace('/', '_').lower()
        dataframe.to_sql(table_name, engine, if_exists='replace', index=False)
        print(f"Data saved to {table_name} table successfully.")
    except SQLAlchemyError as e:
        print(f"Error saving {table_name}: {str(e)}")

# @task(retries=3)
def process_data(key):
    try:
        df = nd.get_table(key).head()
        date_guess = [col for col in df.columns if 'date' in col.lower()][0]
        table, data = download_data(key, paginate = True,  **{date_guess: {'gte': '2024-03-15'}})
        save_to_db(data, key)
    except:
        table, data = download_data(key, paginate = True)
        save_to_db(data, key)


In [21]:

tables_filters = {
    # 'QDL/ODA': {},
    # 'QDL/FON': {},
    # 'QDL/OPEC': {},
    # 'QDL/JODI': {},
    # 'QDL/BITFINEX': {},
    # 'QDL/BCHAIN': {},
    # 'QDL/LME': {},
    # 'ZILLOW/DATA': {},
    # 'WASDE/DATA': {},
    # 'WB/DATA': {}
    'DY/FDEQU' :{}
}
# filters = {
#     # 'date': '2019-10-08', 
#     # 'contract_code': '967654'
# }
# data = download_data(table, **filters)

In [7]:
# data.head()

In [8]:
# save_to_db(data, 'QDL_FON')

In [22]:
download_all_tables(tables_filters)

DEBUG:urllib3.connectionpool:Starting new HTTPS connection (1): data.nasdaq.com:443
DEBUG:urllib3.connectionpool:https://data.nasdaq.com:443 "GET /api/v3/datatables/DY/FDEQU HTTP/1.1" 200 None


Data saved to dy_fdequ table successfully.
Total time taken: 4.75 seconds


In [19]:
query = "SELECT * FROM DY_FDEQU "
# DY/FDEQU
retrieved_data = retrieve_data(query, engine)
retrieved_data.head()

Unnamed: 0,ticker_symbol,exchange_code,equity_type,security_name,exchange_country_code,transaction_currency_code,listing_status,listing_date,delisting_date,last_updated
0,1,XSHE,A,"Ping An Bank Co., Ltd.",CHN,CNY,L,1991-04-03,,2017-03-17
1,2,XSHE,A,"China Vanke Co., Ltd.",CHN,CNY,L,1991-01-29,,2019-08-20
2,9,XSHE,A,"China Baoan Group Co.,Ltd.",CHN,CNY,L,1991-06-25,,2017-04-28
3,651,XSHE,A,"Gree Electric Appliances,Inc. of Zhuhai",CHN,CNY,L,1996-11-18,,2019-08-20
4,776,XSHE,A,"GF Securities Co., Ltd.",CHN,CNY,L,1997-06-11,,2020-09-02


In [23]:
retrieved_data.columns

Index(['ticker_symbol', 'exchange_code', 'equity_type', 'security_name',
       'exchange_country_code', 'transaction_currency_code', 'listing_status',
       'listing_date', 'delisting_date', 'last_updated'],
      dtype='object')

In [11]:
query = "SELECT * FROM ZILLOW_DATA"
retrieved_data = retrieve_data(query, engine)
retrieved_data.head()

Unnamed: 0,indicator_id,region_id,date,value
0,ZSFH,99999,2024-04-30,471998.338021
1,ZSFH,99999,2024-03-31,470671.863494
2,ZSFH,99999,2024-02-29,471144.364833
3,ZSFH,99999,2024-01-31,474834.034632
4,ZSFH,99999,2023-12-31,478670.136789


In [12]:
current_dir = os.path.abspath('')
pkl_file_path = os.path.join(current_dir, '..', 'data', 'info_dict.pkl')


with open(pkl_file_path, 'rb') as f:
    data = pickle.load(f)

print(data)

{'NDAQ/RTAT10': {'url': ['https://data.nasdaq.com/databases/RTAT'], 'vendor_code': 'NDAQ', 'datatable_code': 'RTAT10', 'name': 'Retail Trading Activity Tracker - Daily Top 10 (Free)', 'description': None, 'columns': [{'name': 'date', 'type': 'Date'}, {'name': 'ticker', 'type': 'text'}, {'name': 'activity', 'type': 'double'}, {'name': 'sentiment', 'type': 'Integer'}], 'filters': ['ticker', 'date'], 'primary_key': ['date', 'ticker'], 'premium': False, 'status': {'refreshed_at': datetime.datetime(2024, 6, 15, 3, 12, 50, tzinfo=tzutc()), 'status': 'ON TIME', 'expected_at': '00 10 * * 2-6', 'update_frequency': '10am every Tuesday through Saturday'}, 'data_version': {'code': '1', 'default': True, 'description': None}, 'Type': ['Prices & Volumes']}, 'NDAQ/RTAT': {'url': ['https://data.nasdaq.com/databases/RTAT'], 'vendor_code': 'NDAQ', 'datatable_code': 'RTAT', 'name': 'Retail Trading Activity Tracker - Daily Full Universe (Premium)', 'description': None, 'columns': [{'name': 'date', 'type': 

In [13]:
data

{'NDAQ/RTAT10': {'url': ['https://data.nasdaq.com/databases/RTAT'],
  'vendor_code': 'NDAQ',
  'datatable_code': 'RTAT10',
  'name': 'Retail Trading Activity Tracker - Daily Top 10 (Free)',
  'description': None,
  'columns': [{'name': 'date', 'type': 'Date'},
   {'name': 'ticker', 'type': 'text'},
   {'name': 'activity', 'type': 'double'},
   {'name': 'sentiment', 'type': 'Integer'}],
  'filters': ['ticker', 'date'],
  'primary_key': ['date', 'ticker'],
  'premium': False,
  'status': {'refreshed_at': datetime.datetime(2024, 6, 15, 3, 12, 50, tzinfo=tzutc()),
   'status': 'ON TIME',
   'expected_at': '00 10 * * 2-6',
   'update_frequency': '10am every Tuesday through Saturday'},
  'data_version': {'code': '1', 'default': True, 'description': None},
  'Type': ['Prices & Volumes']},
 'NDAQ/RTAT': {'url': ['https://data.nasdaq.com/databases/RTAT'],
  'vendor_code': 'NDAQ',
  'datatable_code': 'RTAT',
  'name': 'Retail Trading Activity Tracker - Daily Full Universe (Premium)',
  'descript

In [14]:
print('{')
for i in data:
    key = i.strip()
    if value:
        value = i['status']['expected_at'].strip()

    print(f'"{key}" : "{value}",')
print('}')


{


NameError: name 'value' is not defined

In [None]:
def get_next_run_time(cron_expression):
    base = datetime.now()
    iter = croniter(cron_expression, base)
    return iter.get_next(datetime)

In [16]:
# TEST

In [18]:
# def test_download_data():
#     table = 'QDL/FON'
#     filters = {'date': '2019-10-08', 'contract_code': '967654'}
#     data = download_data(table, **filters)
#     assert not data.empty, "Downloaded data is empty"

# test_download_data()

In [28]:
def test_save_to_db():
    table = 'QDL/FON'
    filters = {'date': '2019-10-08', 'contract_code': '967654'}
    data = download_data(table, **filters)
    save_to_db(data, 'test_table')
    # Further assertions to verify the data saved to the database

test_save_to_db()

DEBUG:urllib3.connectionpool:Starting new HTTPS connection (1): data.nasdaq.com:443
DEBUG:urllib3.connectionpool:https://data.nasdaq.com:443 "GET /api/v3/datatables/QDL/FON?date=2019-10-08&contract_code=967654 HTTP/1.1" 200 None


Data saved to test_table table successfully.


In [29]:
def test_retrieve_data():
    query = "SELECT * FROM test_table WHERE date='2019-10-08'"
    data = retrieve_data(query)
    assert not data.empty, "Retrieved data is empty"

test_retrieve_data()

In [27]:
# nd.Database('WIKI').bulk_download_to_file('/data')


In [26]:
# db = nd.Database('WIKI')
# db.database_code

DEBUG:urllib3.connectionpool:Starting new HTTPS connection (1): data.nasdaq.com:443
DEBUG:urllib3.connectionpool:https://data.nasdaq.com:443 "GET /api/v3/databases/WIKI HTTP/1.1" 200 None


'WIKI'

In [21]:
# dt = nd.Datatable('ZACKS/FC')

In [24]:
# dt.data_fields()

DEBUG:urllib3.connectionpool:Starting new HTTPS connection (1): data.nasdaq.com:443
DEBUG:urllib3.connectionpool:https://data.nasdaq.com:443 "GET /api/v3/datatables/ZACKS/FC/metadata HTTP/1.1" 200 None


['vendor_code',
 'datatable_code',
 'name',
 'description',
 'columns',
 'filters',
 'primary_key',
 'premium',
 'status',
 'data_version']

In [30]:
# database = nd.Database('WIKI')
# database.data_fields()


DEBUG:urllib3.connectionpool:Starting new HTTPS connection (1): data.nasdaq.com:443
DEBUG:urllib3.connectionpool:https://data.nasdaq.com:443 "GET /api/v3/databases/WIKI HTTP/1.1" 200 None


['id',
 'name',
 'database_code',
 'description',
 'datasets_count',
 'downloads',
 'premium',
 'image',
 'favorite',
 'url_name',
 'exclusive']

In [31]:
# databases = nd.Datatable('RATE')
# databases.values

DEBUG:urllib3.connectionpool:Starting new HTTPS connection (1): data.nasdaq.com:443
DEBUG:urllib3.connectionpool:https://data.nasdaq.com:443 "GET /api/v3/databases HTTP/1.1" 410 None


DataLinkError: (Status 410) Something went wrong. Please try again. If you continue to have problems, please contact us at connect@data.nasdaq.com.