In [2]:
from utils.common import *

# Get Data

In [3]:
import urllib.request
import json


def retrieve(url):
    with urllib.request.urlopen(url) as response:
        html = response.read().decode("utf-8")
        data = json.loads(html)
    return data

In [4]:
def query_chart_data(begin_datetime, end_datetime, pair, period):
    begin, end = begin_datetime.timestamp(), end_datetime.timestamp()
    url = f'https://poloniex.com/public?command=returnChartData&currencyPair={pair}&start={begin}&end={end}&period={period}'
    return retrieve(url)


def query_trade_history(begin_datetime, end_datetime, pair):
    begin, begin = begin_datetime.timestamp(), end_datetime.timestamp()
    url = f'https://poloniex.com/public?command=returnTradeHistory&currencyPair={pair}&start={begin}&end={end}'
    return retrieve(url)

In [5]:
from dateutil.rrule import rrule, YEARLY, MONTHLY
from dateutil.relativedelta import relativedelta
from time import sleep


def retrieve_long_window(begin_time, end_time, pair, period):
    begin, end = str_time_to_datetime(begin_time, end_time)
    data = []
    for dt in rrule(YEARLY, dtstart=begin, until=end):
        dt2 = dt+relativedelta(years=1)
        data += query_chart_data(dt, dt2, pair, period)
        sleep(1)
    
    return data

In [6]:
def rename_date_to_timestamp(df):
    return df.rename(columns={'date': 'timestamp'})

In [7]:
def poloniex_data_to_df(data):
    df = pd.DataFrame(data)
    df = rename_date_to_timestamp(df).query('timestamp > 0')
    df['date'] = df['timestamp'].apply(timestamp_to_date)
    return df.drop_duplicates()

# Download Chart Data

In [13]:
from sqlite3 import connect

def create_table(table_name, conn):
    sql_create = f"""
    CREATE TABLE IF NOT EXISTS {table_name} (
            timestamp       BIGINT,
            period          STRING,
            open            DOUBLE,
            high            DOUBLE,
            low             DOUBLE,
            close           DOUBLE,
            volume          DOUBLE,
            quoteVolume     DOUBLE,
            weightedAverage DOUBLE,
            date            DATETIME,
            PRIMARY KEY (
                timestamp,
                period
            )
        );
    """

    conn.execute(sql_create);
    

def write_sql(df, db, table_name):
    conn = connect(db)
    create_table(table_name, conn)
    
    key_cols = ['timestamp', 'period']
    old = read_table(table_name, db, conn)
    keys = old.append(df, sort=True).groupby(key_cols).count().query('close == 1').reset_index()[key_cols]
    not_in_db = df.merge(keys, on=key_cols, how='inner')
    not_in_db.to_sql(table_name, conn, if_exists='append', index=False)

In [14]:
def retrieve_and_save(pair, start_time, end_time, interval, interval_str):
    data = retrieve_long_window(start_time, end_time, pair, interval)
    df = poloniex_data_to_df(data)
    df['period'] = interval_str
    
    db_root = "D:\\Dropbox\\My work\\krypl-project\\sqlite\\ploniex-chart-data"
    db = f"{db_root}\\{pair}.db"
    write_sql(df, db, 'chart_data')

In [15]:
start_time, end_time = '2014-01-02 00:00:00', '2018-09-16 23:59:59'
interval, interval_str = 1800, '5min'
pairs = [
    'USDT_BTC',
#     'BTC_ETH',
#     'BTC_LTC',
#     'BTC_XRP',
#     'BTC_XMR',
#     'BTC_STR'
]

In [16]:
for pair in pairs:
    print(pair, end='\t')
    retrieve_and_save(pair, start_time, end_time, interval, interval_str)
    print('done')

USDT_BTC	done


# Download Trades

In [None]:
import datetime
import calendar


def add_months(sourcedate, months=1):
    month = sourcedate.month - 1 + months
    year = sourcedate.year + month // 12
    month = month % 12 + 1
    day = min(sourcedate.day,calendar.monthrange(year, month)[1])
    return datetime.datetime(year, month, day)

In [None]:
pair = 'USDT_BTC'
start_time, end_time = '2017-10-01 00:00:00', '2018-05-19 23:59:59'
beginDT, endDT = str_time_to_datetime(start_time, end_time)

In [None]:
data_root = 'D:\\Dropbox\\My work\\krypl-project\\'
path = f'{data_root}\\data\\poloniex\\trades\\{pair}'

In [None]:
import sys
from datetime import timedelta

for dt in rrule(MONTHLY, dtstart=beginDT, until=endDT):
    f, t = dt, add_months(dt)
    month_trades = pd.DataFrame()
    while True:
        sys.stdout.write('\r' + str(f) + ' - ' + str(t))
        sys.stdout.flush()
        trades = query_trade_history(f, t, pair)
        
        if trades == {'error': 'Please specify a time window of no more than 1 month.'}:
            f += timedelta(days=1)
            continue
        
        trades = pd.DataFrame(trades)
        month_trades = month_trades.append(trades)
        t = strTimeToDatetime(trades.date.min())
        f = dt
        if len(trades) < 50000:
            break
    
    month = '0%d' % dt.month if dt.month < 10 else str(dt.month)
    file_name = '{year}_{month}.tsv'.format(year=dt.year, month=month)
    month_trades = month_trades.drop_duplicates().sort_values('date')
    writeTsv(month_trades, path + '\\' + file_name)