In [None]:
# source: https://maikros.github.io/yahoo-finance-python/

import requests                  # [handles the http interactions](http://docs.python-requests.org/en/master/) 
from bs4 import BeautifulSoup    # beautiful soup handles the html to text conversion and more
import re                        # regular expressions are necessary for finding the crumb (more on crumbs later)
from datetime import datetime    # string to datetime object conversion
from time import mktime          # mktime transforms datetime objects to unix timestamps

def _get_crumbs_and_cookies(stock):
    """
    get crumb and cookies for historical data csv download from yahoo finance
    
    parameters: stock - short-handle identifier of the company 
    
    returns a tuple of header, crumb and cookie
    """
    
    url = 'https://finance.yahoo.com/quote/{}/history'.format(stock)
    with requests.session():
        header = {'Connection': 'keep-alive',
                   'Expires': '-1',
                   'Upgrade-Insecure-Requests': '1',
                   'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) \
                   AppleWebKit/537.36 (KHTML, like Gecko) Chrome/54.0.2840.99 Safari/537.36'
                   }
        
        website = requests.get(url, headers=header)
        soup = BeautifulSoup(website.text, 'lxml')
        crumb = re.findall('"CrumbStore":{"crumb":"(.+?)"}', str(soup))

        return (header, crumb[0], website.cookies)
    
    
def convert_to_unix(date):
    """
    converts date to unix timestamp
    
    parameters: date - in format (dd-mm-yyyy)
    
    returns integer unix timestamp
    """
    datum = datetime.strptime(date, '%d-%m-%Y')
    
    return int(mktime(datum.timetuple()))


def load_csv_data(stock, interval='1d', day_begin='01-03-2018', day_end='28-03-2018'):
    """
    queries yahoo finance api to receive historical data in csv file format
    
    parameters: 
        stock - short-handle identifier of the company
        
        interval - 1d, 1wk, 1mo - daily, weekly monthly data
        
        day_begin - starting date for the historical data (format: dd-mm-yyyy)
        
        day_end - final date of the data (format: dd-mm-yyyy)
    
    returns a list of comma seperated value lines
    """
    day_begin_unix = convert_to_unix(day_begin)
    day_end_unix = convert_to_unix(day_end)
    
    header, crumb, cookies = _get_crumbs_and_cookies(stock)
    
    with requests.session():
        url = 'https://query1.finance.yahoo.com/v7/finance/download/' \
              '{stock}?period1={day_begin}&period2={day_end}&interval={interval}&events=history&crumb={crumb}' \
              .format(stock=stock, day_begin=day_begin_unix, day_end=day_end_unix, interval=interval, crumb=crumb)
                
        website = requests.get(url, headers=header, cookies=cookies)
       
        return website.text # old: website.text.split('\n')[:-1]

def text4csv(stock='AAPL', day_begin='01-03-2018', day_end='28-03-2019'):
    text4csv = load_csv_data(stock, day_begin=day_begin, day_end=day_end)
    f = open(stock + '.csv', "w") 
    f.write(text4csv)
    f.close()

day_begin = '01-03-2014'
day_end   = '28-03-2019'

import pandas as pd
def chk_csv(stock):
    df = pd.read_csv(stock + '.csv')
    print(df.head())

In [58]:
text4csv(stock='AAPL', day_begin=day_begin, day_end=day_end)
chk_csv('AAPL')

         Date       Open       High        Low      Close  Adj Close    Volume
0  2014-03-03  74.774284  75.807144  74.687141  75.394287  66.669823  59695300
1  2014-03-04  75.857140  76.091431  75.395714  75.891426  67.109421  64785000
2  2014-03-05  75.845711  76.392860  75.589996  76.051430  67.250908  50015700
3  2014-03-06  76.112854  76.348572  75.442856  75.821426  67.047539  46372200
4  2014-03-07  75.870003  75.997147  75.150002  75.777145  67.008354  55182400


In [59]:
text4csv(stock='TEAM', day_begin=day_begin, day_end=day_end)
chk_csv('TEAM')

         Date       Open       High        Low      Close  Adj Close    Volume
0  2015-12-09  21.000000  21.000000  21.000000  21.000000  21.000000         0
1  2015-12-10  27.670000  28.500000  26.500000  27.780001  27.780001  15007300
2  2015-12-11  27.850000  28.500000  27.480000  27.500000  27.500000   2286100
3  2015-12-14  27.500000  27.860001  26.110001  26.260000  26.260000   1223500
4  2015-12-15  26.389999  26.948999  26.299999  26.500000  26.500000   1086400


In [60]:
stock = 'CRM'
text4csv(stock=stock, day_begin=day_begin, day_end=day_end)
chk_csv(stock)

         Date       Open       High        Low      Close  Adj Close    Volume
0  2014-03-03  60.650002  61.709999  59.820000  61.490002  61.490002   9303400
1  2014-03-04  62.180000  63.369999  62.009998  63.070000  63.070000   7927200
2  2014-03-05  63.070000  63.750000  62.939999  63.680000  63.680000   5464400
3  2014-03-06  63.750000  63.869999  62.730000  63.139999  63.139999   3958800
4  2014-03-07  63.200001  63.360001  59.730000  60.740002  60.740002  11239300


In [61]:
def get_csv_chk(stock):
    text4csv(stock=stock, day_begin=day_begin, day_end=day_end)
    chk_csv(stock)

get_csv_chk('BABA')

         Date       Open       High        Low      Close  Adj Close  \
0  2014-09-19  92.699997  99.699997  89.949997  93.889999  93.889999   
1  2014-09-22  92.699997  92.949997  89.500000  89.889999  89.889999   
2  2014-09-23  88.940002  90.480003  86.620003  87.169998  87.169998   
3  2014-09-24  88.470001  90.570000  87.220001  90.570000  90.570000   
4  2014-09-25  91.089996  91.500000  88.500000  88.919998  88.919998   

      Volume  
0  271879400  
1   66657800  
2   39009800  
3   32088000  
4   28598000  
