In [1]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [2]:
import os
from datetime import datetime

import requests
from bs4 import BeautifulSoup


class TimeFrame:
    ONE_MINUTE = 'M1'
    TICK_DATA = 'T'
    TICK_DATA_LAST = 'T_LAST'
    TICK_DATA_BID = 'T_BID'
    TICK_DATA_ASK = 'T_ASK'


class Platform:
    META_TRADER = 'MT'
    GENERIC_ASCII = 'ASCII'
    EXCEL = 'XLSX'
    NINJA_TRADER = 'NT'
    META_STOCK = 'MS'


class URL:
    META_TRADER = 'https://www.histdata.com/download-free-forex-historical-data/?/metatrader/1-minute-bar-quotes/'
    ASCII_1M = 'https://www.histdata.com/download-free-forex-historical-data/?/ascii/1-minute-bar-quotes/'
    ASCII_TICK_DATA = 'https://www.histdata.com/download-free-forex-historical-data/?/ascii/tick-data-quotes/'
    EXCEL = 'https://www.histdata.com/download-free-forex-historical-data/?/excel/1-minute-bar-quotes/'
    NINJA_TRADER = 'https://www.histdata.com/download-free-forex-historical-data/?/ninjatrader/1-minute-bar-quotes/'
    NINJA_TRADER_LAST_QUOTES = 'https://www.histdata.com/download-free-forex-historical-data/?/ninjatrader/tick-last-quotes/'
    NINJA_TRADER_BID_QUOTES = 'https://www.histdata.com/download-free-forex-historical-data/?/ninjatrader/tick-bid-quotes/'
    NINJA_TRADER_ASK_QUOTES = 'https://www.histdata.com/download-free-forex-historical-data/?/ninjatrader/tick-ask-quotes/'
    META_STOCK = 'https://www.histdata.com/download-free-forex-historical-data/?/metastock/1-minute-bar-quotes/'


def get_prefix_referer(time_frame, platform):
    if time_frame == TimeFrame.TICK_DATA and platform == Platform.GENERIC_ASCII:
        return URL.ASCII_TICK_DATA
    elif time_frame == TimeFrame.TICK_DATA_LAST and platform == Platform.NINJA_TRADER:
        return URL.NINJA_TRADER_LAST_QUOTES
    elif time_frame == TimeFrame.TICK_DATA_BID and platform == Platform.NINJA_TRADER:
        return URL.NINJA_TRADER_BID_QUOTES
    elif time_frame == TimeFrame.TICK_DATA_ASK and platform == Platform.NINJA_TRADER:
        return URL.NINJA_TRADER_ASK_QUOTES
    elif time_frame == TimeFrame.ONE_MINUTE and platform == Platform.GENERIC_ASCII:
        return URL.ASCII_1M
    elif time_frame == TimeFrame.ONE_MINUTE and platform == Platform.META_TRADER:
        return URL.META_TRADER
    elif time_frame == TimeFrame.ONE_MINUTE and platform == Platform.EXCEL:
        return URL.EXCEL
    elif time_frame == TimeFrame.ONE_MINUTE and platform == Platform.NINJA_TRADER:
        return URL.NINJA_TRADER
    elif time_frame == TimeFrame.ONE_MINUTE and platform == Platform.META_STOCK:
        return URL.META_STOCK
    else:
        raise Exception('Invalid combination of time_frame and platform.')


def get_referer(referer_prefix, pair, year, month):
    if month is not None:
        return referer_prefix + '{}/{}/{}'.format(pair.lower(), year, month)
    return referer_prefix + '{}/{}'.format(pair.lower(), year)


def download_hist_data(year='2016',
                       month=None,
                       pair='eurusd',
                       time_frame=TimeFrame.ONE_MINUTE,
                       platform=Platform.GENERIC_ASCII,
                       output_directory='.',
                       verbose=True):

    tick_data = time_frame.startswith('T')
    if (not tick_data) and ((int(year) >= datetime.now().year and month is None) or
                            (int(year) <= datetime.now().year - 1 and month is not None)):
        msg = 'For the current year, please specify month=7 for example.\n'
        msg += 'For the past years, please query per year with month=None.'
        raise AssertionError(msg)

    prefix_referer = get_prefix_referer(time_frame, platform)
    referer = get_referer(prefix_referer, pair.lower(), year, month)

    # Referer is the most important thing here.
    headers = {'Host': 'www.histdata.com',
               'Connection': 'keep-alive',
               'Content-Length': '104',
               'Cache-Control': 'max-age=0',
               'Origin': 'https://www.histdata.com',
               'Upgrade-Insecure-Requests': '1',
               'Content-Type': 'application/x-www-form-urlencoded',
               'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8',
               'Referer': referer}

    if verbose:
        print(referer)
    r1 = requests.get(referer, allow_redirects=True)
    assert r1.status_code == 200, 'Make sure the website www.histdata.com is up.'

    soup = BeautifulSoup(r1.content, 'html.parser')
    try:
        token = soup.find('input', {'id': 'tk'}).attrs['value']
        assert len(token) > 0
    except:
        raise AssertionError('There is no token. Please make sure your year/month/pair is correct.'
                             'Example is year=2016, month=7, pair=eurgbp')

    data = {'tk': token,
            'date': str(year),
            'datemonth': '{}{}'.format(year, str(month).zfill(2)) if month is not None else str(year),
            'platform': platform,
            'timeframe': time_frame,
            'fxpair': pair.upper()}
    r = requests.post(url='https://www.histdata.com/get.php',
                      data=data,
                      headers=headers)

    assert len(r.content) > 0, 'No data could be found here.'
    if verbose:
        print(data)
    if not os.path.exists(output_directory):
        os.makedirs(output_directory)
    if month is None:
        output_filename = 'DAT_{}_{}_{}_{}.zip'.format(platform, pair.upper(), time_frame, str(year))
    else:
        output_filename = 'DAT_{}_{}_{}_{}.zip'.format(platform, pair.upper(), time_frame,
                                                       '{}{}'.format(year, str(month).zfill(2)))
    output_filename = os.path.join(output_directory, output_filename)
    with open(output_filename, 'wb') as f:
        for chunk in r.iter_content(chunk_size=1024):
            if chunk:
                f.write(chunk)
    if verbose:
        print('Wrote to {}'.format(output_filename))
    return output_filename


if __name__ == '__main__':
   
    # print(download_hist_data(year='2019', month=None, platform=Platform.META_TRADER, time_frame=TimeFrame.ONE_MINUTE))
    # print(download_hist_data(year='2018', month=None, platform=Platform.META_STOCK, time_frame=TimeFrame.ONE_MINUTE))
    pass
import pandas as pd
import numpy as np
import zipfile as zf
import os

pairs = ["USDJPY", "EURUSD", "GBPUSD", "USDCHF", "USDCAD", "NZDUSD", "XAUUSD"]
years = [2000, 2000, 2000, 2000, 2000,2005, 2009]
pairs = ['USDJPY']
years = [2000]
last = 7
#last_year = int(datetime.now().year)
for pair,year in zip(pairs,years):
  for y in range(year,2022):
    download_hist_data(year=str(y), pair=pair.lower(),month=None, platform=Platform.META_TRADER, time_frame=TimeFrame.ONE_MINUTE, verbose=False)

  for m in range(1,last):
    download_hist_data(year='2022', pair=pair.lower(),month=str(m), platform=Platform.META_TRADER, time_frame=TimeFrame.ONE_MINUTE, verbose=False)

  
  name = "DAT_MT_"+pair+"_M1_20"
  for i in range(year%100,22):
    strr = name
    if i<10:
      strr+='0'+str(i)+'.zip'
    else :
      strr+=str(i)+'.zip'
    #!unzip strr
    files = zf.ZipFile(strr,'r')
    files.extractall()
    files.close()
    os.remove(strr)
    strr = strr[:-4]
    strr += '.txt'
    os.remove(strr)


  name = "DAT_MT_"+pair+"_M1_20220"
  for i in range(1,last):
    strr = name
    strr+=str(i)+'.zip'
    #!unzip strr
    files = zf.ZipFile(strr,'r')
    files.extractall()
    files.close()
    os.remove(strr)
    strr = strr[:-4]
    strr += '.txt'
    os.remove(strr)

  
  data = pd.read_csv('DAT_MT_'+pair+'_M1_'+str(year)+'.csv', names=["date", "open", "high", "low", "close", "volume"])
  os.remove('DAT_MT_'+pair+'_M1_'+str(year)+'.csv')
  name = "DAT_MT_"+pair+"_M1_20"
  for i in range(year%100+1,22):
    strr = name
    if i<10:
      strr+='0'+str(i)+'.csv'
    else :
      strr+=str(i)+'.csv'
    df = pd.read_csv(strr, names=["date", "open", "high", "low", "close", "volume"])
    data = pd.concat([data,df])
    os.remove(strr)

  name = "DAT_MT_"+pair+"_M1_20220"
  for i in range(1,last):
    strr = name

    strr+=str(i)+'.csv'
    df = pd.read_csv(strr, names=["date", "open", "high", "low", "close", "volume"])
    data = pd.concat([data,df])
    os.remove(strr)


  data.drop('volume', axis=1, inplace=True)
  data.reset_index(inplace=True) 
  print(pair+" shape : ",data.shape)

  data.to_csv(pair+'.csv',index=False)

USDJPY shape :  (7415787, 6)


In [3]:
# pairs = ["USDJPY", "EURUSD", "GBPUSD", "USDCHF", "USDCAD", "NZDUSD", "XAUUSD"]
# timee = ['1','5', '10', '30', '1440', '43200']

# for pair in pairs:
#     for mi in timee:
#         data = pd.read_csv(pair+'.csv')
#         data['index'] +=' '
#         data['index'] += data['date'] 
#         data["date"] = data["index"].apply(lambda x: x.replace('.','-'))
#         data['date']+=':00'
#         data.drop('index',axis=1, inplace=True)
#         data.set_index('date', inplace=True)
#         data.index = pd.DatetimeIndex(data.index)
#         data = data.resample(mi+'Min').agg({'open': 'first', 'high': 'max', 'low': 'min', 'close': 'last'})
#         is_NaN = data.isnull()
#         row_has_NaN = is_NaN.any(axis=1)
#         rows_with_NaN = data[row_has_NaN]
#         data.drop(rows_with_NaN.index,inplace=True)
#         data.reset_index(inplace=True)
#         data.to_csv(pair+' '+mi+'.csv',index=False)


In [4]:
# import os
# for pair in pairs:
#     os.remove(pair+'.csv')
