In [1]:
# Importar librerías 
import DataFunctions
import ModelFunctions
import seaborn as sns
import yfinance as yf
import matplotlib.pyplot as plt
from datetime import datetime
from valuation import Companies, Models
from sklearn.impute import KNNImputer
import statsmodels.api as sm
import numpy as np
import pandas as pd
from datetime import timedelta
#from ydata_profiling import ProfileReport

In [2]:
# Cargar Datos
income_statement = DataFunctions.load_full_excel('Data/Income/Income_Statement.xlsx')
balance_statement = DataFunctions.load_full_excel('Data/Balance/Balance_Statement.xlsx')
sp500 = DataFunctions.assets(income_statement=income_statement)

# Si se desea cambiar de periodo de analisis cambiar la siguiente fecha:
prices = yf.download(tickers=sp500, start='2018-09-01', end='2023-09-23', progress=False)['Adj Close'] 

# Filtrado de fechas y correcciones temporales
prices_fiscal = DataFunctions.prices_date(balance_statement=balance_statement, prices=prices, sp500=sp500)

# DataFrame limpio con información consolidada de ambos estados financieros y precios
financial_info = DataFunctions.clean_df(balance_statement=balance_statement, 
                                    income_statement=income_statement, 
                                    sp500=sp500, 
                                    prices_fiscal=prices_fiscal)

# Datos en formato tabular
data_table = DataFunctions.tabular_df(financial_info=financial_info, sp500=sp500)

# Cálculo de ratios financieros 
stock = Companies(data_table)
stock.get_ratios()

# EDA con miras a limpieza de datos
DataFunctions.dqr(stock.ratios)

# Limpieza
stock.clean_ratios()
stock.clean

Unnamed: 0,Stock,fiscalDateEnding,PER,PBV,Acid_test,ATR,CCC,ROA,DER,NPM,EM,Return
0,A,2023-06-30,117.226398,3.280427,26.970165,0.156505,484.824339,0.027984,0.866805,0.178804,1.866805,1
1,A,2023-03-31,115.687540,3.729464,12.466387,0.159538,473.104821,0.032237,0.946693,0.202067,1.946693,1
2,A,2022-12-31,119.300066,4.168479,76.111111,0.174041,425.001250,0.034941,0.985297,0.200764,1.985297,1
3,A,2022-09-30,108.403263,3.401819,14.888889,0.163201,419.650125,0.031381,1.059320,0.192285,2.059320,0
4,A,2022-06-30,128.247936,3.361065,15.462857,0.151698,416.910303,0.026208,1.041195,0.172762,2.041195,0
...,...,...,...,...,...,...,...,...,...,...,...,...
14,XRAY,2019-12-31,197.198259,1.773366,4.074675,0.101090,520.351878,0.008993,0.990733,0.088959,1.990733,1
15,XRAY,2019-09-30,434.443256,1.833476,26.970165,0.116104,445.294066,0.004220,0.816937,0.036349,1.816937,0
16,XRAY,2019-06-30,450.749762,1.958478,26.970165,0.104921,447.986379,0.004345,0.760390,0.041411,1.760390,1
17,XRAY,2019-03-31,8354.720484,1.679904,26.970165,0.118376,407.148055,0.000201,0.777601,0.001699,1.777601,0


In [3]:
import joblib
from locale import setlocale, LC_TIME

database = stock.clean.copy()
loaded_model = joblib.load(open('GBC_bagging_model.pkl', 'rb'))

setlocale(LC_TIME, 'en_US.UTF-8')

# Model
#database.to_csv('data.csv')
data = pd.read_csv('data.csv').drop('Unnamed: 0', axis=1)
data['fiscalDateEnding'] = pd.to_datetime(data['fiscalDateEnding'], format='%Y-%m-%d')
data['Yhat'] = loaded_model.predict(data.drop(['Stock', 'fiscalDateEnding', 'Return'], axis=1))
data = data.sort_values(by = 'fiscalDateEnding', ascending = True)

# Prices
prices = stock.df[['Stock','fiscalDateEnding','Adj Close']]
prices = prices.sort_values(by = 'fiscalDateEnding')

data = data.merge(prices, how = 'left', on = ['fiscalDateEnding','Stock'])
data.head()

Unnamed: 0,Stock,fiscalDateEnding,PER,PBV,Acid_test,ATR,CCC,ROA,DER,NPM,EM,Return,Yhat,Adj Close
0,GEN,2018-12-31,-397.673958,0.371745,26.970165,0.071512,906.206897,-0.000935,0.862399,-0.013072,1.86327,0,0,9.880098
1,DISH,2018-12-31,18.623365,0.939512,41.532332,0.396907,209.470823,0.050448,0.862399,0.127103,1.86327,0,0,24.969999
2,DLTR,2018-12-31,103.204542,3.398345,26.970165,0.647207,20.407688,0.032928,0.862399,0.050877,1.86327,0,0,90.32
3,DOV,2018-12-31,134.51558,2.472537,26.970165,0.204184,95.486986,0.018381,0.862399,0.090022,1.86327,0,0,65.714211
4,DOW,2018-12-31,0.0,0.0,26.970165,1.740944,8.31514,0.156812,0.862399,0.090073,1.86327,0,0,0.0


In [4]:
rf = pd.read_csv("^IRX.csv")
rf.Date = pd.to_datetime(rf['Date'], format='%m/%d/%y')

rf["rf"] = rf["Adj Close"]
rf.drop(["Adj Close"],axis=1 ,inplace=True)

rf["fiscalDateEnding"] = rf.Date
rf.drop(["Date"],axis=1 ,inplace=True)

rf.fillna(method='ffill', inplace=True)

data = data.merge(rf, on="fiscalDateEnding")

investment_rf = {0: 1, 
                 1: 0.8, 
                 2: 0.6, 
                 3: 0.4, 
                 4: 0.2, 
                 5: 0}

initial_capital = 1000000
comision = .00025

#date = '2018-12-31'
date = '2019-06-30'
trade_period = data[data['fiscalDateEnding'] == date]
traded_portfolio = trade_period[trade_period['Yhat'] == 1].sample(n = 5)

traded_portfolio['Q'] = (initial_capital/traded_portfolio['Adj Close'])
traded_portfolio

Unnamed: 0,Stock,fiscalDateEnding,PER,PBV,Acid_test,ATR,CCC,ROA,DER,NPM,EM,Return,Yhat,Adj Close,rf,Q
910,HAL,2019-06-30,44.068526,0.742454,26.970165,0.63589,1095.398168,0.016848,0.76039,0.026495,1.76039,1,1,21.130648,2.035,47324.626113
779,F,2019-06-30,2.388722,0.303422,5.44001,4.471514,10.813445,0.127023,0.76039,0.028407,1.76039,1,1,8.635567,2.035,115800.159204
951,MOS,2019-06-30,57.368164,0.831718,26.970165,0.236954,1290.522344,0.014498,0.76039,0.061184,1.76039,1,1,23.671152,2.035,42245.514504
894,HPE,2019-06-30,9.777739,0.454098,26.970165,0.792507,129.665231,0.046442,0.76039,0.058601,1.76039,0,1,12.92389,2.035,77376.083454
1118,KMI,2019-06-30,9.107019,0.56124,26.970165,0.380071,198.661113,0.061627,0.76039,0.162146,1.76039,1,1,15.973197,2.035,62604.874969


In [5]:
import AssetAllocation as AA

In [None]:
rf_rate = traded_portfolio.rf.values[0]
tickers = list(traded_portfolio.Stock.values)
tickers.append('^GSPC')
end_date = traded_portfolio.fiscalDateEnding.unique()[0]
start_date = end_date + timedelta(days = -365)
omega_prices = yf.download(tickers, start=start_date, end=end_date, progress=False)['Adj Close']
omega = AA.asset_allocation(data_stocks=omega_prices[omega_prices.columns[:-1]], data_benchmark=omega_prices[omega_prices.columns[-1]].to_frame(), rf=rf_rate)
omega_weights = omega.omega(n_port=1)



In [110]:
def pick_assets(data: pd.DataFrame, assets: pd.DataFrame, fiscal_date: str):
    # Previous Stocks Evaluation
    previous_stocks = data[(data['fiscalDateEnding'] == fiscal_date) & (data['Yhat'] == 1)].merge(assets, on= 'Stock', how = 'inner')
    previous_stocks = previous_stocks['Stock'].values
    # Add Missing Stocks
    new_assets = 5 - len(previous_stocks)
    add_assets = data[(data['fiscalDateEnding'] == fiscal_date) & (data['Yhat'] == 1)].sample(n = new_assets)['Stock'].values
    assets_list = list(previous_stocks) + list(add_assets)    
    return assets_list

def omegaAA(data: pd.DataFrame, assets_lists: list, fiscal_date: str, mkt_idx: str = '^GSPC'):
    # Omega Optimization
    rf_rate = data.rf.values[0]
    tickers = assets_lists.copy()
    tickers.append(mkt_idx)
    end_date = pd.to_datetime(fiscal_date)
    start_date = end_date + timedelta(days = -365)
    omega_prices = yf.download(tickers, start=start_date, end=end_date, progress=False)['Adj Close']
    omega = AA.asset_allocation(data_stocks=omega_prices[omega_prices.columns[:-1]], data_benchmark=omega_prices[omega_prices.columns[-1]].to_frame(), rf=rf_rate)
    omega_weights = omega.omega(n_port=1)

    # RF
    rf_percentage = (5 - len(omega_weights)) * .2
    if rf_percentage > 0:
        assets_lists = assets_lists + ['Rf']
        omega_weights = np.array(omega_weights) * (1 - rf_percentage)
        omega_weights = np.concatenate((omega_weights, [rf_percentage]))
        omega_weights = list(omega_weights)

    # Assets DF
    assets = pd.DataFrame([ pd.to_datetime([fiscal_date for i in range(len(omega_weights))]),
                             assets_lists, omega_weights],
                           index=['Date','Stock','W']).transpose()
    return omega_weights, assets

def trade(data: pd.DataFrame, assets: pd.DataFrame, operations: pd.DataFrame, omega_weights: list, fiscal_date: str, initial_capital: int = 1000000):
    # Data
    tickers_new = assets[assets['Date'] == fiscal_date]['Stock'].values
    previous_date = assets[assets['Date'] != fiscal_date]['Date'].max()
    tickers_previous = assets[assets['Date'] == previous_date]['Stock'].values
    end_date = pd.to_datetime(fiscal_date) + timedelta(days=7)
    tickers = np.unique(np.array(list(tickers_new) + list(tickers_previous)))
    tickers = list(tickers)
    prices_new = yf.download(tickers=tickers, start = fiscal_date, end = end_date, progress = False)['Adj Close'].iloc[0]
    prices_new = prices_new.to_frame()
    prices_new.columns = ['Price']

    # X_T-1
    Xt_prior = operations.set_index('Stock')['X'].to_frame()
    Xt_prior = Xt_prior.merge(prices_new, left_index = True, right_index = True, how = 'left')
    Xt_prior['Pos'] = Xt_prior['X'] * Xt_prior['Price']
    Val_Port = Xt_prior['Pos'].sum()
    if Val_Port == 0:
        Val_Port = initial_capital
    Xt_prior = Xt_prior['X'].to_frame()
    Xt_prior = Xt_prior.rename(columns = {'X':'X_1'})
    ## Valuation
    #X_T
    Xt = pd.DataFrame( omega_weights, index = tickers_new, columns=['W'])
    Xt = Xt.merge(prices_new, left_index = True, right_index = True, how = 'left') 
    Xt["X"] = Xt['W'] * Val_Port / Xt['Price']
    Xt = Xt['X'].to_frame()

    #X
    X = Xt.join(Xt_prior, how='outer').fillna(0.0)
    X['Trade'] = X['X'] - X['X_1']

    # Trade functions
    # Buy

    # Sell 

    return X

In [111]:
assets = pd.DataFrame(columns=['Date','Stock','W'])
operations = pd.DataFrame(columns=['Date','Stock','X','Price','Position','Type'])
fiscal_date = '2019-06-30'
assets_list = pick_assets(data=data, assets=assets, fiscal_date=fiscal_date)
omega_weights, assets = omegaAA(data=data, assets_lists=assets_list, fiscal_date=fiscal_date)
trade(data=data, assets=assets, operations=operations, omega_weights=omega_weights, fiscal_date=fiscal_date)

In [35]:
previous_fiscal_date = assets[assets['Date'] != fiscal_date]['Date'].max()
previous = assets[assets['Date'] == previous_fiscal_date]
stocks_portfolio_previous = previous[previous['Stock'] != 'Rf']['Stock'].values
if len(stocks_portfolio_previous) > 0:
    yf.download(tickers=stocks_portfolio)

Timestamp('2019-06-30 00:00:00')