In [None]:
import numpy as np
import pandas as pd
import yfinance as yf
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler
import numpy as np
from keras.layers import Dense, LSTM
from keras.models import Sequential

tickers = pd.read_html('https://en.wikipedia.org/wiki/List_of_S%26P_500_companies')[0][['Symbol', 'GICS Sector']]

type(tickers['Symbol'])
tickers['Symbol'] = tickers['Symbol'].str.replace('.', '-')
tickers[tickers['Symbol'] == 'BF-B']

sector_breakdown = tickers.groupby('GICS Sector')['Symbol'].apply(list)
sector_breakdown = sector_breakdown.to_dict()

ticker_list = []
for sector in sector_breakdown:
    ticker_list.extend(sector_breakdown[sector])

today = pd.Timestamp.today().strftime('%Y-%m-%d')
month_ago = pd.Timestamp.today() - pd.DateOffset(months=60)

data = yf.download(ticker_list, start=month_ago, end=today)
data = data.drop(columns=['Open', 'High', 'Low', 'Close', 'Volume'])

#update with bfill and ffill method instead
data = data.fillna(method='bfill', axis=0)
data = data.fillna(method='ffill', axis=0)

returns = data.pct_change()
returns = returns.droplevel(0, axis=1)

raw_data = {}

for sector, tickers in sector_breakdown.items():
    sector_data = returns[tickers]
    raw_data[sector] = sector_data.mean(axis=1)

raw_data = pd.DataFrame(raw_data)

raw_data = raw_data.dropna()
raw_data
results = pd.DataFrame(columns=raw_data.columns)

for i, column in enumerate(raw_data.columns):
    y = raw_data[column]
    X = raw_data.drop(column, axis=1)

    scaler_x = MinMaxScaler()
    scaler_y = MinMaxScaler()

    X_scaled = scaler_x.fit_transform(X)
    y_scaled = scaler_y.fit_transform(y.values.reshape(-1, 1))

    X_scaled = X_scaled.reshape(X_scaled.shape[0], X_scaled.shape[1], 1)

    X_train, X_test, y_train, y_test = train_test_split(X_scaled, y_scaled, test_size=0.33, random_state=42)

    model = Sequential()
    model.add(LSTM(units=5, return_sequences=True, input_shape=(X_train.shape[1], 1)))
    model.add(LSTM(units=5, return_sequences=False))
    model.add(Dense(units=25))
    model.add(Dense(units=1))

    model.compile(optimizer='adam', loss='mean_squared_error', metrics=['accuracy'])

    set_epoch = 20
    train = model.fit(X_train, y_train, batch_size=16, epochs=set_epoch, validation_split=0.1, verbose=0)
    raw_predictions = model.predict(X_test)
    pred = scaler_y.inverse_transform(raw_predictions)

    results[column] = pred.flatten()

print(results)


  tickers['Symbol'] = tickers['Symbol'].str.replace('.', '-')


[*********************100%%**********************]  503 of 503 completed
     Communication Services  Consumer Discretionary  Consumer Staples  \
0                  0.004715                0.006116          0.003769   
1                 -0.012910               -0.015325         -0.006436   
2                  0.001554                0.003305          0.001649   
3                  0.007480                0.010724          0.004226   
4                  0.008090                0.009452          0.004775   
..                      ...                     ...               ...   
410               -0.017830               -0.021027         -0.008863   
411               -0.018438               -0.021958         -0.009727   
412                0.007900                0.011871          0.004538   
413                0.010882                0.015185          0.006588   
414               -0.001300               -0.000017          0.000401   

       Energy  Financials  Health Care  Industrial

In [None]:
# Average the last 30 rows for each sector
average_results = results.tail(30).mean()

# Replace negative values with 0
average_results[average_results < 0] = 0



Communication Services    7.492996e-06
Consumer Discretionary    2.410326e-05
Consumer Staples          1.092777e-05
Energy                    5.775695e-06
Financials                2.633400e-06
Health Care               4.029103e-07
Industrials               0.000000e+00
Information Technology    2.806135e-06
Materials                 0.000000e+00
Real Estate               1.252185e-05
Utilities                 0.000000e+00
dtype: float32


In [None]:
# Sum of the averaged results
sum_results = average_results.sum()

# Calculate the percentage of the sum for each sector
allocations = (average_results / sum_results)

# Display the percentage results
print(allocations)

# Convert allocations to a NumPy array
allocations_array = np.array(allocations)

Communication Services    0.112399
Consumer Discretionary    0.361563
Consumer Staples          0.163923
Energy                    0.086639
Financials                0.039503
Health Care               0.006044
Industrials               0.000000
Information Technology    0.042094
Materials                 0.000000
Real Estate               0.187835
Utilities                 0.000000
dtype: float32


In [None]:
p = [
    [1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
  [0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0],
  [0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0],
  [0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0],
  [0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0],
  [0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0],
  [0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0],
  [0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0],
  [0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0],
  [0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0],
  [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1]
]

q = allocations_array

print(p)
print(q)


[[1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], [0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0], [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1]]
[0.11239941 0.36156324 0.16392313 0.08663887 0.03950256 0.0060439
 0.         0.0420937  0.         0.1878352  0.        ]
