In [None]:
!pip install -q pandas-datareader yahooquery investpy fundamentus

In [None]:
### Setting up libraries
from google.colab import drive
import numpy as np
import pandas as pd
from datetime import datetime
from tqdm.notebook import tqdm
import time
from pandas_datareader import data as pdr
import yahooquery as yq
import investpy as inv
import fundamentus as fd
import warnings

warnings.filterwarnings("ignore")

# Connect to Google Drive (My data lake)
drive.mount('/content/drive', force_remount=True)

Mounted at /content/drive


In [None]:
### Read data from my data lake
df=pd.read_csv('/content/drive/My Drive/data_lake/alpha_raking.csv')

# Get current date t
curr_date = datetime.today().strftime('%Y-%m-%d')

# Check if the Last Update is equal today
df = df[(df.last_update == curr_date)]

df.head(3)

Unnamed: 0,ticker,qtd_dias,s_p,acwx,emm,cmmdt,usd_real,mkt,size,value,momnt,liq,return,vol,alpha,r_score,last_update,Div. Yield Med
0,ISAE4.SA,4027,0.02,0.03,0.035,0.005,-0.016,0.182,0.022,0.041,0.001,0.014,3.825,0.01835,0.00092,0.09,2025-06-15,8.97
1,SAPR4.SA,4027,0.031,0.032,0.023,-0.012,-0.052,0.242,0.057,0.07,0.01,0.047,3.263,0.02216,0.00069,0.109,2025-06-15,5.23
2,FRAS3.SA,4027,0.031,0.012,0.012,0.013,-0.021,0.179,0.095,-0.007,-0.004,0.071,2.79,0.03954,0.00067,0.021,2025-06-15,3.06


In [None]:
def tidy_fin(ticker, data, freq, hist_prices):
  # Get sector
  sector = data.asset_profile[ticker]['industry']

  # Load do balance sheet
  asst_annual_fin = (
      data
      .get_financial_data(
          types = ['TotalRevenue', 'GrossProfit', 'NetIncome', 'CommonStockEquity', 'ShareIssued'],
          frequency = freq
      )
      .query("periodType != 'TTM'")
  )

  # Check if GrossProfit is in DF
  if 'GrossProfit' not in asst_annual_fin.columns:
    asst_annual_fin['GrossProfit'] = asst_annual_fin['TotalRevenue']

  # Transform index date frequency
  asst_annual_fin.index = asst_annual_fin.asOfDate.apply(pd.to_datetime).dt.year

  # Left Join with Prices Table and Mutate it
  asst_annual_fin = (
      asst_annual_fin
      .merge(
          last_prices[last_prices.index.isin(asst_annual_fin.index)],
          left_index=True,
          right_index=True
      )
      .assign(
          ticker = ticker,
          sector = sector,
          gross_margin = asst_annual_fin['GrossProfit'] / asst_annual_fin['TotalRevenue'],
          net_margin = asst_annual_fin['NetIncome'] / asst_annual_fin['TotalRevenue'],
          psr = last_prices['last'] / (asst_annual_fin['TotalRevenue'] / asst_annual_fin['ShareIssued']),
          pvp = last_prices['last'] / (asst_annual_fin['CommonStockEquity'] / asst_annual_fin['ShareIssued']),
          pl = last_prices['last'] / (asst_annual_fin['NetIncome'] / asst_annual_fin['ShareIssued'])
      )
      [['ticker', 'sector', 'gross_margin',	'net_margin',	'psr',	'pvp',	'pl']]
      .sort_index(ascending=True)
  )

  return asst_annual_fin



# function to check if a value is '-'
def compare(x):
  if x=='-' or x=='':
    return 0.0
  else:
    return float(x)

In [None]:
# Select tickers to get fundamentalist informations #'DEXP3.SA'
lista = df['ticker']

# Create dataframe to save fundamental indexes
data = pd.DataFrame()

for ticker in tqdm(lista):
  ### Get the Historical Company Performance - Gross and Net Margin
  #################################################################
  df_fd = fd.get_papel(ticker[:-3])

  # Get Historical Prices & Dividends
  yf_data = yq.Ticker(ticker)
  asst_data = yf_data.history(period = '10y').reset_index(0)
  asst_data['year'] = pd.to_datetime(asst_data.index.to_series(), errors='coerce', utc=True).dt.year

  if 'dividends' not in asst_data.columns:
    asst_data['dividends'] = 0

  ### Get the last price of each year
  time.sleep(0.2)
  last_prices = asst_data.groupby('year')['close'].agg(['last'])

  # Function do Get the Balance Sheet (or Balanco Patrimonial)
  hist_perf = (
      tidy_fin(
          ticker = ticker,
          data = yf_data,
          freq = 'a',
          hist_prices = last_prices
      )
      .groupby(['ticker', 'sector'])
      .median()
  )

  ### Get the Historical Dividend Yield
  #####################################

  ### Calculate the Dividend payed by year
  sun_div = asst_data[asst_data.dividends != 0].groupby('year')['dividends'].agg(['sum'])

  ### Grouping Last Price with Dividends Sum
  asst_div_data = pd.concat([last_prices, sun_div], axis=1)

  asst_div_data['yield'] = asst_div_data['sum'] / asst_div_data['last']

  hist_div = round(asst_div_data['yield'].median() * 100, 2)


  ### Final Dataset - Fundamentalist Performance & Index
  ######################################################

  final_data_fund = pd.DataFrame(
      {
      'ticker': ticker,
      'Setor': hist_perf.index[0][1],
      'Mg. Bruta Med.': [round(hist_perf['gross_margin'][0] * 100, 2)],
      'Mg. Bruta Hj': [compare(df_fd.Marg_Bruta[0][:-1])],
      'Mg. Net Med': [round(hist_perf['net_margin'][0] * 100, 2)],
      'Mg. Net Hj':[compare(df_fd.Marg_Liquida[0][:-1])],
      'ROIC': [compare(df_fd['ROIC'][0][:-1])],
      'ROE': [compare(df_fd['ROE'][0][:-1])],
      'Div. Liq / Patr': [compare(df_fd['Div_Br_Patrim'][0][:-1]) / 10],
      'Liq Corrente': [compare(df_fd['Liquidez_Corr'][0]) / 100],
      'Div. Yield Med': [hist_div],
      'Div. Yield Hj': [compare(df_fd.Div_Yield[0][:-1])],
      'PSR Med': [round(hist_perf['psr'][0], 2)],
      'PSR Hj':[compare(df_fd.PSR[0]) / 100],
      'PVP Med':[round(hist_perf['pvp'][0], 2)],
      'PVP Hj':[compare(df_fd.PVP[0]) / 100],
      'PL Med':[round(hist_perf['pl'][0], 2)],
      'PL Hj':[compare(df_fd.PL[0]) / 100]
      }
  )

  time.sleep(0.2)

  data = pd.concat([data, final_data_fund], ignore_index=True)

  0%|          | 0/37 [00:00<?, ?it/s]

In [None]:
### SECTOR + INDUSTRY FILTERING & BLOCKLIST
###########################################

### BLOCKLIST - INDUSTRIES
blocklist_industry = [
    'Real Estate Services'
    ,'Real Estate—Development'
    ,'Real Estate - Development'
    ,'Residential Construction'
    ,'Airlines'
    ,'Specialty Retail'
    ,'Internet Retail'
    ,'Apparel Retail'
    ,'Packaged Foods'
    ,'Restaurants'
    ,'Oil & Gas Integrated'
    ,'Oil & Gas E&P'
    ,'Oil & Gas Refining & Marketing'
    ,'Oil & Gas Equipment & Services'
    ,'Steel'
]

### BLOCKLIST - COMPANIES
blocklist_companies = [
    'PETR4.SA'  ## Muita treta envolvida
    ,'VALE3.SA' ## Brumadinho e Mariana
    ,'AZUL4.SA' ## Setor ruim
    ,'GOLL4.SA' ## Setor ruim
    ,'JBSS3.SA'
    ,'AALR3.SA' ## TOP 1 Piores ESG
    ,'PNVL3.SA' ## TOP 2 Piores ESG
    ,'TRIS3.SA' ## TOP 3 Piores ESG
    ,'BRAP3.sa' ## TOP 4 Piores ESG
    ,'LAND3.SA' ## TOP 5 Piores ESG
]

### FILTER
data = data[
    (~data['Setor'].isin(blocklist_industry))
    & (~data['ticker'].isin(blocklist_companies))
].reset_index(drop=True)

data

Unnamed: 0,ticker,Setor,Mg. Bruta Med.,Mg. Bruta Hj,Mg. Net Med,Mg. Net Hj,ROIC,ROE,Div. Liq / Patr,Liq Corrente,Div. Yield Med,Div. Yield Hj,PSR Med,PSR Hj,PVP Med,PVP Hj,PL Med,PL Hj
0,ISAE4.SA,Utilities - Independent Power Producers,59.92,46.3,44.81,43.1,11.4,17.5,0.7,3.77,8.97,10.2,2.79,1.81,0.97,0.75,5.73,4.27
1,SAPR4.SA,Utilities - Regulated Water,58.63,56.2,22.6,34.1,8.2,19.7,0.5,1.47,5.23,4.5,1.18,1.53,0.77,0.89,5.23,4.49
2,FRAS3.SA,Auto Parts,31.55,33.6,8.73,7.5,12.3,15.4,1.2,2.19,3.06,1.9,1.26,1.64,2.5,3.42,13.51,22.26
3,RANI3.SA,Packaging & Containers,41.21,38.6,20.57,19.4,13.2,22.8,1.2,3.51,5.13,8.8,1.14,1.09,1.81,1.28,5.79,5.62
4,SLCE3.SA,Farm Products,31.23,31.7,11.57,8.8,12.3,15.2,1.2,1.44,4.44,3.0,1.02,0.95,2.05,1.67,9.16,10.97
5,TAEE11.SA,Utilities - Regulated Electric,73.72,63.5,50.47,42.4,11.2,23.1,1.4,1.85,8.27,8.1,11.3,2.95,5.53,1.6,22.36,6.94
6,SHUL4.SA,Auto Parts,25.89,26.2,13.01,13.4,11.1,18.1,0.5,3.1,4.59,7.8,0.97,0.96,1.58,1.29,7.97,7.12
7,PSSA3.SA,Insurance - Diversified,100.0,100.0,6.82,7.9,75.1,20.4,0.0,1.13,4.37,3.0,0.61,0.94,1.47,2.47,8.85,12.12
8,BBSE3.SA,Insurance - Diversified,100.0,0.0,145.12,0.0,0.0,74.1,0.0,0.0,5.87,10.4,13.17,0.0,7.16,6.09,9.51,8.22
9,VIVT3.SA,Telecom Services,43.63,43.8,9.79,10.1,7.7,8.4,0.3,0.96,8.49,5.5,1.53,1.71,1.13,1.43,14.74,17.01


In [None]:
######################################
### Filter All TOP Stocks - And Cheap!
######################################

final_data = data[

    (data['Mg. Bruta Med.'] >= 15)
    & ((data['Mg. Bruta Hj'] >= 15) | ((data['Setor'] == 'Intermediários Financeiros') & (data['Mg. Bruta Hj'] == 0)))

    & (data['Mg. Net Med'] > 5)
    & ((data['Mg. Net Hj'] > 5) | ((data['Setor'] == 'Intermediários Financeiros') & (data['Mg. Net Hj'] == 0)))

    & ((data['ROIC'] > 8) | ((data['Setor'] == 'Intermediários Financeiros') & (data['ROIC'] == 0)))
    & (data['ROE'] > 9)

    & (data['Div. Liq / Patr'] <= 2)
    & ((data['Liq Corrente'] >= 1) | ((data['Setor'] == 'Intermediários Financeiros') & (data['Liq Corrente'] == 0)))

    & (data['Div. Yield Med'] >= 3)
    & (data['Div. Yield Hj'] >= 3)

    & (((data['PSR Hj'] < data['PSR Med'] * 0.85))

    & (data['PVP Hj'] < data['PVP Med'] * 0.85))

    & (data['PL Med'] > 0)
    & (data['PL Hj'] > 0)
    & ((data['PL Hj'] <= 12) | (data['PL Hj'] < data['PL Med'] * 0.85))

  ].reset_index(drop=True)

final_data

Unnamed: 0,ticker,Setor,Mg. Bruta Med.,Mg. Bruta Hj,Mg. Net Med,Mg. Net Hj,ROIC,ROE,Div. Liq / Patr,Liq Corrente,Div. Yield Med,Div. Yield Hj,PSR Med,PSR Hj,PVP Med,PVP Hj,PL Med,PL Hj
0,ISAE4.SA,Utilities - Independent Power Producers,59.92,46.3,44.81,43.1,11.4,17.5,0.7,3.77,8.97,10.2,2.79,1.81,0.97,0.75,5.73,4.27
1,TAEE11.SA,Utilities - Regulated Electric,73.72,63.5,50.47,42.4,11.2,23.1,1.4,1.85,8.27,8.1,11.3,2.95,5.53,1.6,22.36,6.94
2,AGRO3.SA,Farm Products,44.72,39.4,30.17,22.8,11.1,14.3,0.6,1.58,6.16,7.5,2.26,1.58,1.3,0.99,9.65,6.91
3,GRND3.SA,Footwear & Accessories,41.21,47.6,25.31,26.7,16.5,18.0,0.0,4.47,3.88,8.8,2.59,1.87,1.49,1.26,10.51,7.0
4,FLRY3.SA,Diagnostics & Research,27.5,27.6,7.46,7.8,11.8,12.0,1.0,2.29,3.58,7.8,1.37,0.91,2.03,1.36,17.39,11.35


In [None]:
####################################################
### Filter All TOP Stocks - But Expensive - TO WATCH
####################################################

to_watch = data[
    (data['Mg. Bruta Med.'] >= 20)
    & ((data['Mg. Bruta Hj'] >= 20) | ((data['Setor'] == 'Intermediários Financeiros') & (data['Mg. Bruta Hj'] == 0)))

    & (data['Mg. Net Med'] > 6)
    & ((data['Mg. Net Hj'] > 6) | ((data['Setor'] == 'Intermediários Financeiros') & (data['Mg. Net Hj'] == 0)))

    & ((data['ROIC'] > 8) | ((data['Setor'] == 'Intermediários Financeiros') & (data['ROIC'] == 0)))
    & (data['ROE'] > 9)

    & (data['Div. Liq / Patr'] <= 2)
    & ((data['Liq Corrente'] >= 1) | ((data['Setor'] == 'Intermediários Financeiros') & (data['Liq Corrente'] == 0)))

    & (data['Div. Yield Med'] >= 4)
    & (data['Div. Yield Hj'] >= 4)
    & (data['Div. Yield Hj'] < 20)

  ].reset_index(drop=True)

to_watch = to_watch[~to_watch.ticker.isin(final_data.ticker)]

to_watch

Unnamed: 0,ticker,Setor,Mg. Bruta Med.,Mg. Bruta Hj,Mg. Net Med,Mg. Net Hj,ROIC,ROE,Div. Liq / Patr,Liq Corrente,Div. Yield Med,Div. Yield Hj,PSR Med,PSR Hj,PVP Med,PVP Hj,PL Med,PL Hj
1,SAPR4.SA,Utilities - Regulated Water,58.63,56.2,22.6,34.1,8.2,19.7,0.5,1.47,5.23,4.5,1.18,1.53,0.77,0.89,5.23,4.49
2,RANI3.SA,Packaging & Containers,41.21,38.6,20.57,19.4,13.2,22.8,1.2,3.51,5.13,8.8,1.14,1.09,1.81,1.28,5.79,5.62
4,SHUL4.SA,Auto Parts,25.89,26.2,13.01,13.4,11.1,18.1,0.5,3.1,4.59,7.8,0.97,0.96,1.58,1.29,7.97,7.12
5,KEPL3.SA,Farm & Heavy Construction Machinery,29.81,28.0,14.41,10.9,23.2,24.2,0.4,2.01,7.37,9.8,1.02,0.95,2.64,2.11,7.85,8.74
7,CSMG3.SA,Utilities - Regulated Water,39.42,43.0,15.18,17.3,15.1,16.8,0.7,1.22,4.83,6.1,0.99,1.25,0.9,1.22,6.54,7.23
8,LEVE3.SA,Auto Parts,28.0,27.8,14.53,10.4,34.7,56.4,1.1,1.34,4.88,8.7,1.01,0.85,3.67,4.62,6.92,8.21
9,VLID3.SA,Specialty Business Services,35.71,35.0,6.57,14.0,17.9,17.5,0.3,2.83,4.12,10.6,0.59,0.95,0.85,1.19,9.85,6.78


In [None]:
###############################################
### Filter Only Cheap Stocks - But not that BAD
###############################################

cheap_to_watch = data[

    (data['Mg. Bruta Med.'] >= 15)
    & ((data['Mg. Bruta Hj'] >= 15) | ((data['Setor'] == 'Intermediários Financeiros') & (data['Mg. Bruta Hj'] == 0)))

    & (data['Mg. Net Med'] > 1)
    & ((data['Mg. Net Hj'] > 1) | ((data['Setor'] == 'Intermediários Financeiros') & (data['Mg. Net Hj'] == 0)))

    & ((data['ROIC'] >= 1) | ((data['Setor'] == 'Intermediários Financeiros') & (data['ROIC'] == 0)))
    & (data['ROE'] >= 1)

    & (data['Div. Liq / Patr'] <= 2)
    & (
        (data['Liq Corrente'] >= 0.9)
        | ((data['Setor'] == 'Intermediários Financeiros') & (data['Liq Corrente'] == 0))
    )

    & (data['Div. Yield Med'] >= 2)
    & (data['Div. Yield Hj'] >= 2)
    & (data['Div. Yield Hj'] <= 20)

    & (data['PSR Med'] <= 4)
    & (data['PSR Hj'] <= data['PSR Med'] * 0.9)

    & (data['PVP Med'] <= 4)
    & (data['PVP Hj'] <= data['PVP Med'] * 0.9)

    & (data['PL Med'] > 0)
    & (data['PL Med'] <= 20)
    & (data['PL Hj'] > 0)
    & (data['PL Hj'] <= data['PL Med'] * 0.9)

  ].reset_index(drop=True)

cheap_to_watch = (
    cheap_to_watch[
        (~cheap_to_watch.ticker.isin(final_data.ticker))
        & (~cheap_to_watch.ticker.isin(to_watch.ticker))
    ]
    .reset_index(drop=True)
)

cheap_to_watch

Unnamed: 0,ticker,Setor,Mg. Bruta Med.,Mg. Bruta Hj,Mg. Net Med,Mg. Net Hj,ROIC,ROE,Div. Liq / Patr,Liq Corrente,Div. Yield Med,Div. Yield Hj,PSR Med,PSR Hj,PVP Med,PVP Hj,PL Med,PL Hj
0,SMTO3.SA,Specialty Chemicals,33.43,23.6,21.48,13.8,6.3,16.6,1.7,2.27,2.8,6.9,1.6,0.86,1.76,1.03,8.28,6.22


In [None]:
###########################################
### FINAL STOCK PICKING - IT MUST BE MANUAL
###########################################

final_data = (
    data
    #.append(to_watch)
    #.append(cheap_to_watch[cheap_to_watch.ticker == 'ROMI3.SA'])
    .drop_duplicates(subset='ticker', keep="first")
    .reset_index(drop=True)
)

final_data

Unnamed: 0,ticker,Setor,Mg. Bruta Med.,Mg. Bruta Hj,Mg. Net Med,Mg. Net Hj,ROIC,ROE,Div. Liq / Patr,Liq Corrente,Div. Yield Med,Div. Yield Hj,PSR Med,PSR Hj,PVP Med,PVP Hj,PL Med,PL Hj
0,ISAE4.SA,Utilities - Independent Power Producers,59.92,46.3,44.81,43.1,11.4,17.5,0.7,3.77,8.97,10.2,2.79,1.81,0.97,0.75,5.73,4.27
1,SAPR4.SA,Utilities - Regulated Water,58.63,56.2,22.6,34.1,8.2,19.7,0.5,1.47,5.23,4.5,1.18,1.53,0.77,0.89,5.23,4.49
2,FRAS3.SA,Auto Parts,31.55,33.6,8.73,7.5,12.3,15.4,1.2,2.19,3.06,1.9,1.26,1.64,2.5,3.42,13.51,22.26
3,RANI3.SA,Packaging & Containers,41.21,38.6,20.57,19.4,13.2,22.8,1.2,3.51,5.13,8.8,1.14,1.09,1.81,1.28,5.79,5.62
4,SLCE3.SA,Farm Products,31.23,31.7,11.57,8.8,12.3,15.2,1.2,1.44,4.44,3.0,1.02,0.95,2.05,1.67,9.16,10.97
5,TAEE11.SA,Utilities - Regulated Electric,73.72,63.5,50.47,42.4,11.2,23.1,1.4,1.85,8.27,8.1,11.3,2.95,5.53,1.6,22.36,6.94
6,SHUL4.SA,Auto Parts,25.89,26.2,13.01,13.4,11.1,18.1,0.5,3.1,4.59,7.8,0.97,0.96,1.58,1.29,7.97,7.12
7,PSSA3.SA,Insurance - Diversified,100.0,100.0,6.82,7.9,75.1,20.4,0.0,1.13,4.37,3.0,0.61,0.94,1.47,2.47,8.85,12.12
8,BBSE3.SA,Insurance - Diversified,100.0,0.0,145.12,0.0,0.0,74.1,0.0,0.0,5.87,10.4,13.17,0.0,7.16,6.09,9.51,8.22
9,VIVT3.SA,Telecom Services,43.63,43.8,9.79,10.1,7.7,8.4,0.3,0.96,8.49,5.5,1.53,1.71,1.13,1.43,14.74,17.01


In [None]:
### Save the output inside Google Drive
final_data.to_csv('/content/drive/My Drive/data_lake/stock_valuation.csv', encoding='utf-8', index=False)

In [None]:
### Save the output inside Google Drive
int_data = pd.merge(df, data, on="ticker")

In [None]:
int_data[
    (int_data.ticker == 'EGIE3.SA') |
    (int_data.ticker == 'LEVE3.SA') |
    (int_data.ticker == 'SHUL4.SA')][
        ['ticker', 'mkt',	'size',	'value',	'momnt',	'liq',	'Mg. Bruta Med.',	'Mg. Bruta Hj',	'Mg. Net Med',	'Mg. Net Hj',	'Liq Corrente',	'Div. Yield Hj',	'PSR Med',	'PSR Hj',	'PVP Med',	'PVP Hj',	'PL Med',	'PL Hj']
    ]

Unnamed: 0,ticker,mkt,size,value,momnt,liq,Mg. Bruta Med.,Mg. Bruta Hj,Mg. Net Med,Mg. Net Hj,Liq Corrente,Div. Yield Hj,PSR Med,PSR Hj,PVP Med,PVP Hj,PL Med,PL Hj
6,SHUL4.SA,0.229,0.098,0.051,-0.016,0.052,25.89,26.2,13.01,13.4,3.1,7.8,0.97,0.96,1.58,1.29,7.97,7.12
12,EGIE3.SA,0.222,-0.009,0.023,-0.0,-0.01,50.78,53.6,27.14,29.6,1.27,5.7,2.59,2.84,3.81,2.73,11.2,9.81
21,LEVE3.SA,0.229,0.109,0.011,-0.014,0.063,28.0,27.8,14.53,10.4,1.34,8.7,1.01,0.85,3.67,4.62,6.92,8.21
