-
Notifications
You must be signed in to change notification settings - Fork 0
/
getBTCData.py
85 lines (80 loc) · 3.11 KB
/
getBTCData.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
#!/usr/bin/python
import requests
from datetime import datetime, timedelta
import re
from dateutil import tz
from dateutil.parser import parse
import os
# endpoint for historical price
REQUEST = "https://api.coinbase.com/v1/prices/historical"
### exchange API
# endpoint for historical market volume and price
MARKET_REQUEST = "https://api.exchange.coinbase.com/products/BTC-USD/candles"
# market data starting date
INIT = '2014-01-01T00:00:00'
# number of days in each request
BUCKET_SIZE = 100
# number of days to obtain
DATA_SIZE = 600
# number of requests given bucket size and data size
### two variables have to be divisible
REQUEST_SIZE = DATA_SIZE/BUCKET_SIZE
# interval of each entry in bucket
GRANULARITY = 86400
# datetime format for parsing
DT_FORMAT = "%Y-%m-%dT%H:%M:%S"
# output datetime format
OUT_FORMAT = "%Y-%m-%d"
# timezone objects
to_zone = tz.tzutc()
from_zone = tz.tzlocal()
# output file name
MARKET_FILE = './data/market.csv'
PRICE_FILE = './data/price.csv'
def price():
with open(PRICE_FILE, "w+") as f:
for i in xrange(1, 120):
print "page %d..." % i
param = {'page': i}
r = requests.get(REQUEST, params=param)
if r.status_code == 200:
lines = r.text.split('\n')
for line in lines:
# convert time to utc timezone
time, price = line.split(',')
dt_obj = parse(time)
f.write("%s,%s\n" % (dt_obj.astimezone(tz.tzutc()).strftime(format=DT_FORMAT), price))
# volume and price data from Exchange API
# the data represents the exchanged volume and the close price for each day
def marketData():
# initial start time
init = datetime.strptime(INIT, "%Y-%m-%dT%H:%M:%S")
# initialize bucket size
bucket = timedelta(days=BUCKET_SIZE)
with open(MARKET_FILE, "a+") as marketf:
param = {'granularity':GRANULARITY}
for i in xrange(5):
# construct proper bucket
# start time
start_datetime = init + bucket*i
param['start'] = start_datetime.strftime(DT_FORMAT)
# end time
end_datetime = start_datetime + bucket - timedelta(seconds=1)
param['end'] = end_datetime.strftime(DT_FORMAT)
# make request
print "querying data of %s..." % param['start']
r = requests.get(MARKET_REQUEST, params=param)
if r.status_code == 200:
# find all valid entries from string
# then iterate each one
matches = re.findall(r'\[([\d.,]+?)\]', r.text)
for entry in reversed([m.split(',') for m in matches]):
# convert stored timezone to UTC
bucket_datetime = datetime.fromtimestamp(long(entry[0]))
bucket_datetime = bucket_datetime.replace(tzinfo=from_zone)
bucket_t = bucket_datetime.astimezone(to_zone).strftime(OUT_FORMAT)
# write data to file
marketf.write("%s,%s,%s\n" % (bucket_t, entry[-2], entry[-1]))
if __name__ == '__main__':
price()
#marketData()