In [15]:
import datetime
from StringIO import StringIO

import pandas as pd

import time
import urllib
import urllib2

from BeautifulSoup import BeautifulSoup


def get(url, data_list=None, timeout=10, max_try=3):

    if data_list:
        url = "{}?{}".format(url, urllib.urlencode(data_list))
    query = urllib2.Request(url)
    current_try = 0
    while current_try < max_try:
        try:
            response = urllib2.urlopen(query, timeout=timeout)
            html = response.read()
            response.close()
            return html
        except Exception, e:
            print e
            current_try += 1
            time.sleep(timeout)
    raise Exception("Cannot open page {}".format(url))


def get_yahoo_finance_data(symbol, start_date=None, end_date=None, remove_zero_volume=True):
    """
    Using yahoo finance API Get stock price with high low open close data

    :param symbol: stock symbol used in yahoo finance
    :param start_date: start date of the given stock data 2012-03-15
    :param end_date: end data
    :param remove_zero_volume: if True, will remove all data with zero volume
    :return: a list of stock price as [date, open, high, low, close]
    """
    data_list = [('s', symbol)]
    if start_date:
        data = start_date.split('-')
        data_list.append(('a', int(data[1]) - 1))
        data_list.append(('b', data[2]))
        data_list.append(('c', data[0]))
    if end_date:
        data = end_date.split('-')
        data_list.append(('d', int(data[1]) - 1))
        data_list.append(('e', data[2]))
        data_list.append(('f', data[0]))
    data_list.append(('g', 'd'))
    data_list.append(('ignore', '.csv'))

    url = "http://chart.finance.yahoo.com/table.csv"
    stock_info = get(url=url, data_list=data_list)
    stock_data = StringIO(stock_info)
    stock_df = pd.read_csv(stock_data)
    stock_df['Date'] = stock_df['Date'].apply(lambda x: datetime.datetime.strptime(x, '%Y-%m-%d'))
    stock_df = stock_df.set_index('Date')

    if not remove_zero_volume:
        return stock_df

    return stock_df[stock_df['Volume'] > 0]

In [22]:
df = get_yahoo_finance_data('0001.HK')
df

Unnamed: 0_level_0,Open,High,Low,Close,Volume,Adj Close
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2016-09-08,101.5000,102.6000,101.5000,102.2000,5459200,101.46500
2016-09-07,103.0000,103.0000,102.0000,102.1000,4926700,101.36572
2016-09-06,102.7000,103.2000,102.0000,102.2000,5209500,101.46500
2016-09-05,103.3000,103.9000,102.8000,103.2000,3620100,102.45781
2016-09-02,100.0000,103.8000,99.8000,102.8000,12236700,102.06069
2016-09-01,99.7000,99.7000,98.7000,99.5500,6533800,98.83406
2016-08-31,98.7500,99.7000,98.6000,99.7000,3698700,98.98298
2016-08-30,99.3000,99.7000,98.7000,99.1500,4041600,98.43694
2016-08-29,99.3000,99.3000,97.8000,98.8500,2644900,98.13909
2016-08-26,99.5000,99.6000,98.5500,98.7500,3664600,98.03981


In [31]:
from talib import abstract
df = df.sort_index()
df['close'] = df['Close']
df['RSI'] = abstract.RSI(df, timeperiod=7)
df

Unnamed: 0_level_0,Open,High,Low,Close,Volume,Adj Close,RSI,close
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
2000-01-04,71.4718,72.1865,70.0424,70.0424,3194400,37.37045,,70.0424
2000-01-05,66.8261,67.8982,64.8607,65.2180,6058500,34.79644,,65.2180
2000-01-06,65.7541,66.1114,60.7510,62.0018,10440400,33.08047,,62.0018
2000-01-07,62.8952,63.6099,61.8231,63.2525,6049700,33.74777,,63.2525
2000-01-10,65.3967,66.4688,63.7886,63.7886,5195400,34.03380,,63.7886
2000-01-11,65.3967,66.1114,63.9673,65.3967,6175800,34.89178,,65.3967
2000-01-12,64.3246,65.9327,63.4312,64.3246,5453800,34.31977,,64.3246
2000-01-13,64.6820,64.8607,63.0739,63.4312,3499800,33.84311,25.333184,63.4312
2000-01-14,63.6099,64.1460,61.6444,62.5378,3903500,33.36644,23.505016,62.5378
2000-01-17,63.6099,63.6099,62.3592,63.2525,3106900,33.74777,28.332004,63.2525
