In [33]:
import numpy as np
import pandas as pd
import yfinance as yf
import matplotlib.pyplot as plt

import scipy.optimize as optimization

In [34]:
#getting stock data
stocks_ex = ['AAPL', 'WMT', 'TSLA', 'GE', 'AMZN', 'DB']
# stocks = ['DSKY.ME', 'NKNC.ME', 'MTSS.ME', 'IRAO.ME', 'SBER.ME', 'AFLT.ME']
start_date_ex = '2018-01-01'
end_date_ex = '2024-01-01'

def download_data(stocks, start_date, end_date):
  df = {}
  for stock in stocks:
    ticker = yf.Ticker(stock)
    df[stock] = ticker.history(start = start_date, end = end_date)['Close']
  return pd.DataFrame(df)


In [35]:
def show_data(data):
  data.plot(figsize = (18,5))
  plt.show()

In [36]:
def calculate_return(data):
  log_return = np.log(data / data.shift(1))
  return log_return[1:]

In [37]:
COEFF = 1

In [38]:
def show_statistics(returns):
  #annual return using mean
  print(returns.mean() * COEFF)

  #remember covariance is matrix so it will return that
  print(returns.cov() * COEFF)

In [39]:
#caculating annual portfolio return and overall volatility of portfolio
def show_mean_variance(returns, weights):
  portfolio_return = np.sum(returns.mean() * weights) * COEFF
  portfolio_volatility = np.sqrt(np.dot(weights.T, np.dot(returns.cov() * COEFF, weights)))
  print("Expected portfolio mean(return):", portfolio_return)
  print("Expected portfolio volatility(standerd deviation):", portfolio_volatility)


In [8]:
def generate_portfolios(returns):
  portfolio_means = []
  portfolio_risks = []
  portfolio_weights = []

  for i in range(10000):
    w = np.random.random(len(stocks))
    w /= np.sum(w)
    portfolio_weights.append(w)
    # returns.mean() -  среднее по одной акции на протяжении всего времени
    portfolio_means.append(np.sum(returns.mean() * w) * COEFF)
    portfolio_risks.append(np.sqrt(np.dot(w.T, np.dot(returns.cov() * COEFF, w))))

  return np.array(portfolio_weights), np.array(portfolio_means), np.array(portfolio_risks)

In [9]:
def show_portfolios(returns,volatilities):
  plt.figure(figsize = (18,6))
  plt.scatter(volatilities, returns, c = returns / volatilities, marker='o')
  plt.grid(True)
  plt.xlabel('Expected Volatility')
  plt.ylabel('Expected Return')
  plt.colorbar(label = 'Sharp Ratio')
  plt.show()

In [10]:
def statistics(weights, returns):
  portfolio_return = np.sum(returns.mean() * weights) * COEFF
  portfolio_volatility = np.sqrt(np.dot(weights.T, np.dot(returns.cov() * COEFF, weights)))
  return np.array([portfolio_return, portfolio_volatility, portfolio_return / portfolio_volatility])


In [11]:
#scipy find min of given function so we wil find min of -f(x) to get max of f(x)
#f(x)=0 this is the function to minimize
# минимальный риск при заданной степени дохода
def min_function_sharp(weights, returns):
  return statistics(weights, returns)[1]
# было return -statistics(weights, returns)[2]


In [12]:
DOHOD = 0.000824740721
# Пусть будет 100% в год (хотим удвоить).  
# x^365 = 2, x - доходность акций
# x = 1.00190084
# Берём логарифм. ln(x) = 0.000824740721
# Модель заменяет её сложную динамику на линейную модель. То есть берёём вместо x среднюю доходность и делаем вид, что она постоянно такая

In [13]:
def optimize_portfolio(weights, returns):
  constrains = [
    {
      'type': 'eq',
      'fun': lambda x: np.sum(x) - 1
    },
    {
      'type': 'eq',
      'fun': lambda x, returns: np.sum(returns.mean() * x) * COEFF - DOHOD, # lambda x : np.sum(x) - 1
      'args': (returns,)
    },
  ]
  #the weight can be 1 at most for single stock or weights can have value with in 0 and 1
  bounds = tuple((0, 1) for _ in range(len(stocks)))
  return optimization.minimize(
    fun = min_function_sharp,
    x0 = weights[0],
    args = returns,
    method = 'SLSQP',
    bounds = bounds,
    constraints = constrains
  )


In [14]:
def print_optimal_portfolio(optimum, returns):
  print("Optimal portfolio:", optimum['x'].round(3))
  print("Expected return, volatility and sharp ratio:", statistics(optimum['x'].round(3), returns))

In [15]:
def show_optimal_portfolios(opt, rets, portfolio_rets, portfolio_vols):
  plt.figure(figsize = (10,6))
  plt.scatter(
    portfolio_vols,
    portfolio_rets,
    c = portfolio_rets / portfolio_vols,
    marker='o'
  )
  plt.grid(True)
  plt.xlabel('Expected Volatility')
  plt.ylabel('Expected Return')
  plt.colorbar(label = 'Sharp Ratio')
  plt.plot(
    statistics(opt['x'], rets)[1],
    statistics(opt['x'], rets)[0],
    'g*',
    markersize = 20
  )
  plt.show()

In [16]:
# if __name__ == '__main__':
#   dataset = (download_data())
#   show_data(dataset)
#   log_daily_returns = calculate_return(dataset)
#   #show_statistics(log_daily_returns)

#   pweights, means, risks = generate_portfolios(log_daily_returns)
#   show_portfolios(means, risks)
#   optimum = optimize_portfolio(pweights, log_daily_returns)
#   print_optimal_portfolio(optimum, log_daily_returns)
#   show_optimal_portfolios(optimum, log_daily_returns, means, risks)

In [17]:
!pip install pytickersymbols
!pip install get-all-tickers



In [22]:
from pytickersymbols import PyTickerSymbols
# from get_all_tickers import get_tickers as gt

# list_of_tickers = gt.get_tickers()

# print(list_of_tickers[0])

stock_data = PyTickerSymbols()
countries = stock_data.get_all_countries()
indices = stock_data.get_all_indices()
industries = stock_data.get_all_industries()

# the naming conversation is get_{index_name}_{exchange_city}_{yahoo or google}_tickers
dax_google = stock_data.get_dax_frankfurt_google_tickers()
dax_yahoo = stock_data.get_dax_frankfurt_yahoo_tickers()
sp100_yahoo = stock_data.get_sp_100_nyc_yahoo_tickers()
sp500_google = stock_data.get_sp_500_nyc_google_tickers()
dow_yahoo = stock_data.get_dow_jones_nyc_yahoo_tickers()
# there are too many combination. Here is a complete list of all getters
all_ticker_getter_names = list(filter(
   lambda x: (
         x.endswith('_google_tickers') or x.endswith('_yahoo_tickers')
   ),
   dir(stock_data),
))
# print(all_ticker_getter_names)

['get_aex_frankfurt_google_tickers', 'get_aex_frankfurt_yahoo_tickers', 'get_aex_london_google_tickers', 'get_aex_london_yahoo_tickers', 'get_aex_moscow_google_tickers', 'get_aex_moscow_yahoo_tickers', 'get_aex_nyc_google_tickers', 'get_aex_nyc_yahoo_tickers', 'get_bel_20_frankfurt_google_tickers', 'get_bel_20_frankfurt_yahoo_tickers', 'get_bel_20_london_google_tickers', 'get_bel_20_london_yahoo_tickers', 'get_bel_20_moscow_google_tickers', 'get_bel_20_moscow_yahoo_tickers', 'get_bel_20_nyc_google_tickers', 'get_bel_20_nyc_yahoo_tickers', 'get_cac_40_frankfurt_google_tickers', 'get_cac_40_frankfurt_yahoo_tickers', 'get_cac_40_london_google_tickers', 'get_cac_40_london_yahoo_tickers', 'get_cac_40_moscow_google_tickers', 'get_cac_40_moscow_yahoo_tickers', 'get_cac_40_nyc_google_tickers', 'get_cac_40_nyc_yahoo_tickers', 'get_cac_mid_60_frankfurt_google_tickers', 'get_cac_mid_60_frankfurt_yahoo_tickers', 'get_cac_mid_60_london_google_tickers', 'get_cac_mid_60_london_yahoo_tickers', 'get_ca

In [29]:
import csv

stocks = []

with open('stocks.csv', newline='') as File:  
    reader = csv.reader(File)
    for row in reader:
        if row[0] != 'Symbol':
            stocks.append(row[0])

# print(stocks)
            

['A', 'AA', 'AACT', 'AAN', 'AAP', 'AAT', 'AB', 'ABBV', 'ABEV', 'ABG', 'ABM', 'ABR', 'ABR^D', 'ABR^E', 'ABR^F', 'ABT', 'AC', 'ACA', 'ACCO', 'ACEL', 'ACHR', 'ACI', 'ACM', 'ACN', 'ACP', 'ACP^A', 'ACR', 'ACR^C', 'ACR^D', 'ACRE', 'ACV', 'ADC', 'ADC^A', 'ADCT', 'ADM', 'ADNT', 'ADT', 'ADX', 'AEE', 'AEFC', 'AEG', 'AEL', 'AEL^A', 'AEL^B', 'AEM', 'AEO', 'AER', 'AES', 'AESI', 'AEVA', 'AFB', 'AFG', 'AFGB', 'AFGC', 'AFGD', 'AFGE', 'AFL', 'AFT', 'AG', 'AGCO', 'AGD', 'AGI', 'AGL', 'AGM', 'AGM^C', 'AGM^D', 'AGM^E', 'AGM^F', 'AGM^G', 'AGO', 'AGR', 'AGRO', 'AGS', 'AGTI', 'AGX', 'AHH', 'AHH^A', 'AHL^C', 'AHL^D', 'AHL^E', 'AHR', 'AHT', 'AHT^D', 'AHT^F', 'AHT^G', 'AHT^H', 'AHT^I', 'AI', 'AIF', 'AIG', 'AIN', 'AIO', 'AIR', 'AIRC', 'AIT', 'AIU', 'AIV', 'AIZ', 'AIZN', 'AJG', 'AJX', 'AJXA', 'AKA', 'AKO/A', 'AKO/B', 'AKR', 'AL', 'AL^A', 'ALB', 'ALB^A', 'ALC', 'ALCC', 'ALE', 'ALEX', 'ALG', 'ALIT', 'ALK', 'ALL', 'ALL^B', 'ALL^H', 'ALL^I', 'ALL^J', 'ALLE', 'ALLG', 'ALLY', 'ALSN', 'ALTG', 'ALTG^A', 'ALTM', 'ALUR', '

In [43]:
import random
#             создать подвыборку
#             наивный вариант
def random_subsample(num, stocks):
    my_stocks = []
    for i in range(num):
        my_stocks.append(random.choice(stocks))
    return my_stocks

In [44]:
# stock_data = PyTickerSymbols()
# german_stocks = stock_data.get_stocks_by_index('DAX')
# uk_stocks = stock_data.get_stocks_by_index('FTSE 100')

import datetime as DT
import pandas as pd


if __name__ == '__main__':

    start_date = DT.datetime(2017, 4, 4)
    end_date = DT.datetime(2024, 4, 4)

    dates = pd.date_range(
        min(start_date, end_date),
        max(start_date, end_date)
    ).strftime('%Y-%m-%d').tolist()
    
    periods = []
    stocks_num = []
    for i in range(len(dates)):
        start_date = dates[i]
        for end_date in dates[i:]:
            
            date1 = datetime.strptime(start_date, '%Y-%m-%d')
            date2 = datetime.strptime(end_date, '%Y-%m-%d')

            num_days = (date2 - date1).days
            
            periods.append(num_days)
            
            num = random.randint(1, len(stocks))
            stocks_num.append(num)
            
            my_stocks = random_subsample(num, stocks)
            
            dataset = (download_data(my_stocks, start_date, end_date))
            log_daily_returns = calculate_return(dataset)
            show_statistics(log_daily_returns)
            pweights, means, risks = generate_portfolios(log_daily_returns)
            optimum = optimize_portfolio(pweights, log_daily_returns)
            
    
#     строим графики зависимости от 1) длительности периода, 2) выборки акций, 3) количества акций

TAP: No price data found, symbol may be delisted (1d 2017-04-04 -> 2017-04-04)
STVN: Data doesn't exist for startDate = 1491278400, endDate = 1491278400
BBN: No price data found, symbol may be delisted (1d 2017-04-04 -> 2017-04-04)
ABBV: No price data found, symbol may be delisted (1d 2017-04-04 -> 2017-04-04)
ANF: No price data found, symbol may be delisted (1d 2017-04-04 -> 2017-04-04)
MFA^B: No timezone found, symbol may be delisted


KeyboardInterrupt: 