# Install Libs

In [None]:
# !pip install MetaTrader5

# Libs

In [1]:
import MetaTrader5 as mt5
import pandas as pd
from tqdm.notebook import tqdm
import datetime
import time

# Get all brazilian stocks

In [2]:
if not mt5.initialize():
    print("initialize() failed, error code =", mt5.last_error())
    quit()

symbols = mt5.symbols_get()
symbol_names = [s.name for s in symbols]

In [3]:
def stock_conditions(s):
    if 5 <= len(s) <= 6 and not all([i.isdigit() for i in s[-3:]]) and '$' not in s and '@' not in s and any([i.isdigit() for i in s]):
        if len(s) == 5 and s[-1] in ['3', '4', '5', '6', '7', '8']:
            return True
        if len(s) == 6 and s[-2:] == '11':
            return True
    else:
        return False

In [4]:
symbol_names = [s for s in symbol_names if stock_conditions(s)]

In [8]:
filtered_symbol_names = []
for s in tqdm(symbol_names):
    mt5.symbol_select(s)
    time.sleep(1)
    sc = mt5.symbol_info(s).session_close
    sb = mt5.symbol_info(s).bid
    sa = mt5.symbol_info(s).ask
    sd = mt5.symbol_info(s).session_deals
    if sc > 0 and sb > 0 and sa > 0 and sd > 100:
        filtered_symbol_names.append(s)

  0%|          | 0/1063 [00:00<?, ?it/s]

In [10]:
symbol_names = filtered_symbol_names

# Download data

In [11]:
mt5_timeframe = mt5.TIMEFRAME_D1
bars_to_download = 2000

# download historical data for each symbol
dfs = {}
for symbol_name in tqdm(symbol_names):
    print(symbol_name, end="\r")
    # select symbol
    mt5.symbol_select(symbol_name)
    time.sleep(1)
    
    # get the earliest date for which data is available
    rates = mt5.copy_rates_from(symbol_name, mt5_timeframe, datetime.datetime.now(), bars_to_download)
    try:
        start_time = rates[-1][0]
    except:
        continue
    
    # convert data to a pandas DataFrame
    df = pd.DataFrame(rates)
    df['time'] = pd.to_datetime(df['time'], unit='s')
    df.set_index('time', inplace=True)
    
    # loop over requests to download all available data
    while True:
        # request historical data
        rates = mt5.copy_rates_from(symbol_name, mt5_timeframe, start_time, bars_to_download)
        try:
            start_time = rates[-1][0]
        except:
            break
        
        # convert data to a pandas DataFrame
        new_df = pd.DataFrame(rates)
        new_df['time'] = pd.to_datetime(new_df['time'], unit='s')
        new_df.set_index('time', inplace=True)
        
        # append new data to the DataFrame
        df = pd.concat([new_df, df])
    
    # add DataFrame to dictionary
    dfs[symbol_name] = df

# disconnect from MetaTrader5
mt5.shutdown()

# display the first 5 rows of the first DataFrame
print(list(dfs.keys())[0])
print(dfs[list(dfs.keys())[0]].head())

  0%|          | 0/473 [00:00<?, ?it/s]

ABCB41
             open   high    low  close  tick_volume  spread  real_volume
time                                                                    
2018-04-16  13.78  13.79  13.44  13.58         2965       1       410400
2018-04-17  13.58  13.78  13.47  13.71         1826       1       297100
2018-04-18  13.70  14.08  13.70  14.03         2278       1       362400
2018-04-19  14.12  14.12  13.92  14.00         1292       1       188500
2018-04-20  14.03  14.06  13.75  13.93         2187       1       312600


# Filtering stocks with 1200 or more data

In [16]:
final_df = {}
for key in dfs.keys():
#     print(dfs[key].dropna().shape[0])
    if dfs[key].dropna().shape[0] >= 1200:
        final_df[key] = dfs[key]

In [17]:
dfs = final_df

# Filter dates that are after 2015-01-01

In [18]:
start_date = pd.Timestamp('2015-01-01')
end_date = pd.Timestamp.today()
calendar_dates = pd.date_range(start_date, end_date, freq='D')

# create a new dataframe with the calendar dates
calendar_df = pd.DataFrame(index=calendar_dates)

# left join all daily dataframes with the calendar dataframe
for symbol_name, daily_df in dfs.items():
    merged_df = pd.merge(calendar_df, daily_df, how='left', left_index=True, right_index=True)
    dfs[symbol_name] = merged_df

# Fill missing values and transform to weekly data

In [19]:
for symbol_name, daily_df in dfs.items():
    # fill missing values with average between last and next not-null value
    daily_df.interpolate(method='linear', inplace=True, )

    # resample to weekly data
    weekly_df = daily_df.resample('W', label='right', closed='right').agg({'open': 'first', 'high': 'max', 'low': 'min', 'close': 'last', 'tick_volume': 'sum'})
    
    # update the DataFrame in the dictionary
    dfs[symbol_name] = weekly_df


# Save results

In [20]:
import pickle
import pandas as pd

# Save the dictionary of dataframes to a file
with open('df_dict.pickle', 'wb') as f:
    pickle.dump(dfs, f)
    
with open('df_dict.pickle', 'rb') as f:
    loaded_df_dict = pickle.load(f)

In [21]:
final_df = {}
for key in loaded_df_dict.keys():
    if loaded_df_dict[key].dropna().shape[0] >= 260:
        final_df[key] = loaded_df_dict[key]

In [22]:
import pickle
import pandas as pd

# Save the dictionary of dataframes to a file
with open('final_df.pickle', 'wb') as f:
    pickle.dump(final_df, f)
    
with open('final_df.pickle', 'rb') as f:
    loaded_df_dict = pickle.load(f)


In [23]:
final_df = loaded_df_dict

In [24]:
df = False

for key in final_df.keys():
    if isinstance(df, bool):
        df = final_df[key].rename(columns={"close": key})[key]
    else:
        df = pd.merge(df, final_df[key].rename(columns={"close": key})[key], how="left", left_index=True, right_index=True)
        
df.index.name = 'Date'

In [27]:
df[(df.index >= "2018-05-06")&(df.index < "2023-04-16")].to_excel("../data/base_dados2.xlsx")