In [1]:
import numpy as np
import pandas as pd

from utils.common import *



# Get Data

In [2]:
import urllib.request
import json

def retrieve(url):
    with urllib.request.urlopen(url) as response:
        html = response.read().decode("utf-8")
        data = json.loads(html)
    return data

In [8]:
def query_chart_data(beginDatetime, endDatetime, currencyPair, period):
    beginTimestamp, endTimestamp = beginDatetime.timestamp(), endDatetime.timestamp()
    url = 'https://poloniex.com/public?command=returnChartData&currencyPair={pair}&start={begin}&end={end}&period={period}'
    return retrieve(url.format(pair=currencyPair, begin=beginTimestamp, end=endTimestamp, period=priod))


def query_trade_history(beginDatetime, endDatetime, currencyPair):
    beginTimestamp, endTimestamp = beginDatetime.timestamp(), endDatetime.timestamp()
    url = 'https://poloniex.com/public?command=returnTradeHistory&currencyPair={pair}&start={begin}&end={end}'
    return retrieve(url.format(pair=currencyPair, begin=beginTimestamp, end=endTimestamp))

In [16]:
from dateutil.rrule import rrule, YEARLY, MONTHLY
from dateutil.relativedelta import relativedelta
from time import sleep

def retrieveLongWindowPoloniex(beginTime, endTime, currencyPair, period):
    beginDT = strTimeToDatetime(beginTime)
    endDT = strTimeToDatetime(endTime)
    data = []
    for dt in rrule(YEARLY, dtstart=beginDT, until=endDT):
        dt2 = dt+relativedelta(years=1)
        data += queryPoloniex(dt, dt+relativedelta(years=1), currencyPair, period)
        sleep(1)
    
    return data
        

In [5]:
def renameDateToTimestamp(df):
    return df.rename(columns={'date': 'timestamp'})

In [6]:
def poloniexDataToDf(data):
    df = pd.DataFrame(data)
    df = renameDateToTimestamp(df)
    df['date'] = df['timestamp'].apply(timestampToDate)
    return df.drop_duplicates()

# Download Chart Data

In [7]:
def retrieveAndSave(cPair, startDateTime, endDateTime, interval, intervalStr):
    startDate, endDate = startDateTime[:10], endDateTime[:10]
    data = retrieveLongWindowPoloniex(startDateTime, endDateTime, cPair, interval)
    df = poloniexDataToDf(data)
    fileName = '{cPair}_{intervalStr}_{startDate}_{endDate}.tsv'.format(**locals())
    dataRoot = 'D:\\Dropbox\\My work\\krypl-project\\'
    path = '{dataRoot}\\data\\poloniex\\{intervalStr}\\{fileName}'.format(**locals())
    writeTsv(df, path)

In [None]:
startDateTime, endDateTime = '2014-01-01 00:00:00', '2018-03-27 23:59:59'
interval, intervalStr = 300, '5min'
cPairs = [
    'USDT_BCH', 'BTC_BCH', 
    'BTC_XEM', 
    'USDT_STR', 'BTC_STR',
    'BTC_DASH',
    'USDT_ETC', 'BTC_ETC',
    'BTC_LSK'
]
cPairs = ['USDT_BTC']

In [None]:
for cPair in cPairs:
    print(cPair, end='\t')
    retrieveAndSave(cPair, startDateTime, endDateTime, interval, intervalStr)
    print('done')

# Download Trades

In [38]:
import datetime
import calendar

def add_months(sourcedate, months=1):
    month = sourcedate.month - 1 + months
    year = sourcedate.year + month // 12
    month = month % 12 + 1
    day = min(sourcedate.day,calendar.monthrange(year, month)[1])
    return datetime.datetime(year, month, day)
    

In [64]:
pair = 'USDT_BTC'
startDateTime, endDateTime = '2017-10-01 00:00:00', '2018-05-19 23:59:59'
beginDT = strTimeToDatetime(startDateTime)
endDT = strTimeToDatetime(endDateTime)

In [65]:
data_root = 'D:\\Dropbox\\My work\\krypl-project\\'
path = '{data_root}\\data\\poloniex\\trades\\{pair}'.format(data_root=data_root, pair=pair)

In [66]:
import sys
from datetime import timedelta

for dt in rrule(MONTHLY, dtstart=beginDT, until=endDT):
    f, t = dt, add_months(dt)
    month_trades = pd.DataFrame()
    while True:
        sys.stdout.write('\r' + str(f) + ' - ' + str(t))
        sys.stdout.flush()
        trades = query_trade_history(f, t, pair)
        
        if trades == {'error': 'Please specify a time window of no more than 1 month.'}:
            f += timedelta(days=1)
            continue
        
        trades = pd.DataFrame(trades)
        month_trades = month_trades.append(trades)
        t = strTimeToDatetime(trades.date.min())
        f = dt
        if len(trades) < 50000:
            break
    
    month = '0%d' % dt.month if dt.month < 10 else str(dt.month)
    file_name = '{year}_{month}.tsv'.format(year=dt.year, month=month)
    month_trades = month_trades.drop_duplicates().sort_values('date')
    writeTsv(month_trades, path + '\\' + file_name)

2018-05-01 00:00:00 - 2018-05-02 01:25:22

True