In [1]:
import datetime
from StringIO import StringIO

import pandas as pd
from talib import abstract

import time
import urllib
import urllib2

from BeautifulSoup import BeautifulSoup


def get(url, data_list=None, timeout=10, max_try=3):

    if data_list:
        url = "{}?{}".format(url, urllib.urlencode(data_list))
    query = urllib2.Request(url)
    current_try = 0
    while current_try < max_try:
        try:
            response = urllib2.urlopen(query, timeout=timeout)
            html = response.read()
            response.close()
            return html
        except Exception, e:
            print e
            current_try += 1
            time.sleep(timeout)
    raise Exception("Cannot open page {}".format(url))


def get_yahoo_finance_data(symbol, start_date=None, end_date=None, remove_zero_volume=True):
    """
    Using yahoo finance API Get stock price with high low open close data

    :param symbol: stock symbol used in yahoo finance
    :param start_date: start date of the given stock data 2012-03-15
    :param end_date: end data
    :param remove_zero_volume: if True, will remove all data with zero volume
    :return: a list of stock price as [date, open, high, low, close]
    """
    data_list = [('s', symbol)]
    if start_date:
        data = start_date.split('-')
        data_list.append(('a', int(data[1]) - 1))
        data_list.append(('b', data[2]))
        data_list.append(('c', data[0]))
    if end_date:
        data = end_date.split('-')
        data_list.append(('d', int(data[1]) - 1))
        data_list.append(('e', data[2]))
        data_list.append(('f', data[0]))
    data_list.append(('g', 'd'))
    data_list.append(('ignore', '.csv'))

    url = "http://chart.finance.yahoo.com/table.csv"
    stock_info = get(url=url, data_list=data_list)
    stock_data = StringIO(stock_info)
    stock_df = pd.read_csv(stock_data)
    stock_df['Date'] = stock_df['Date'].apply(lambda x: datetime.datetime.strptime(x, '%Y-%m-%d'))
    stock_df = stock_df.set_index('Date')

    if not remove_zero_volume:
        return stock_df

    return stock_df[stock_df['Volume'] > 0]

In [2]:
df = get_yahoo_finance_data('0001.HK')
df

Unnamed: 0_level_0,Open,High,Low,Close,Volume,Adj Close
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2016-09-30,98.5000,99.1500,98.0500,98.6000,6769200,98.60000
2016-09-29,99.4000,100.2000,99.2500,99.7000,4066000,99.70000
2016-09-28,98.3000,99.3500,98.0000,99.3500,4793900,99.35000
2016-09-27,99.0000,99.2500,98.5500,99.0000,4681700,99.00000
2016-09-26,99.9000,99.9500,99.0000,99.2000,3059500,99.20000
2016-09-23,100.4000,101.4000,99.8500,100.3000,3470300,100.30000
2016-09-22,100.6000,101.9000,100.2000,100.4000,4875900,100.40000
2016-09-21,99.0500,99.7000,98.4000,99.7000,7077500,99.70000
2016-09-20,100.4000,101.0000,99.4000,99.6000,4264100,99.60000
2016-09-19,101.0000,101.3000,99.8000,101.2000,4079100,101.20000


In [3]:
from talib import abstract
df = df.sort_index()
df['close'] = df['Close']
macd_df = abstract.MACD(df, 7, 14, 9)
macd_df

Unnamed: 0_level_0,macd,macdsignal,macdhist
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
2000-01-04,,,
2000-01-05,,,
2000-01-06,,,
2000-01-07,,,
2000-01-10,,,
2000-01-11,,,
2000-01-12,,,
2000-01-13,,,
2000-01-14,,,
2000-01-17,,,
