<a href="https://colab.research.google.com/github/yorkjong/vistock/blob/feature%2Franking_utils/notebooks/ibd_rs_rating.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

### Stock Analysis and Ranking with IBD RS Rating, inspired by the Investor's Business Daily (IBD) methodology.

### Install and Setup

#### Install Packages

In [None]:
%pip install "git+https://github.com/yorkjong/vistock.git@ranking_utils"
%pip install requests-cache

Collecting git+https://github.com/yorkjong/vistock.git
  Cloning https://github.com/yorkjong/vistock.git to /tmp/pip-req-build-6lg1uude
  Running command git clone --filter=blob:none --quiet https://github.com/yorkjong/vistock.git /tmp/pip-req-build-6lg1uude
  Resolved https://github.com/yorkjong/vistock.git to commit bbf8cc2109b2fcfb635b557e33860898cf7b3bcb
  Preparing metadata (setup.py) ... [?25l[?25hdone
Collecting mplfinance (from vistock==0.7.0)
  Downloading mplfinance-0.12.10b0-py3-none-any.whl.metadata (19 kB)
Downloading mplfinance-0.12.10b0-py3-none-any.whl (75 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m75.0/75.0 kB[0m [31m1.5 MB/s[0m eta [36m0:00:00[0m
[?25hBuilding wheels for collected packages: vistock
  Building wheel for vistock (setup.py) ... [?25l[?25hdone
  Created wheel for vistock: filename=vistock-0.7.0-py3-none-any.whl size=87941 sha256=7bb65fae35d250d69b2c513489714da920d8ba53eac840dc92ad2b4899e0dcbd
  Stored in directory: /tmp/pi

#### Setup and Configuration

In [None]:
# @title Enable DataFrame Formatter
from google.colab import data_table
data_table.enable_dataframe_formatter()

In [None]:
# @title Enable Requests Cache
import requests_cache
requests_cache.install_cache('ibd_cache', expire_after=3600)

In [None]:
# @title GitHub
import base64
import requests
import pandas as pd
from io import StringIO


class GitHub:
    def __init__(self, repo_owner, repo_name, token, dir='', branch='main'):
        base = 'https://api.github.com/repos'
        dir = dir.strip('/')
        if dir:
            self.base_url = f'{base}/{repo_owner}/{repo_name}/contents/{dir}'
        else:
            self.base_url = f'{base}/{repo_owner}/{repo_name}/contents'
        self.branch = branch
        self.token = token

    def _request(self, method, url, headers=None, params=None, json=None):
        response = requests.request(method, url, headers=headers,
                                    params=params, json=json)
        if response.status_code in [200, 201]:
            return response.json()
        elif response.status_code == 404:
            return None
        else:
            print(f"Request failed: {response.status_code} - {response.json()}")
            return None

    def file_exists(self, file_path):
        url = f'{self.base_url}/{file_path}'
        headers = {
            'Authorization': f'token {self.token}',
            'Accept': 'application/vnd.github.v3+json',
        }
        response = requests.get(url, headers=headers,
                                params={'ref': self.branch})
        if response.status_code == 200:
            return True
        elif response.status_code == 404:
            return False
        else:
            print(f"Request failed: {response.status_code} - {response.json()}")
            return None

    def list_filenames(self, dir_path=''):
        url = f'{self.base_url}/{dir_path}'
        headers = {
            'Authorization': f'token {self.token}',
            'Accept': 'application/vnd.github.v3+json',
        }
        response = requests.get(url, headers=headers,
                                params={'ref': self.branch})
        if response.status_code == 200:
            files = response.json()
            return [item['name'] for item in files]
        elif response.status_code == 404:
            print(f"Directory '{dir_path}' does not exist.")
            return []
        else:
            print(f"Request failed: {response.status_code} - {response.json()}")
            return []

    def download_file(self, file_path):
        url = f'{self.base_url}/{file_path}'
        headers = {
            'Authorization': f'token {self.token}',
            'Accept': 'application/vnd.github.v3+json',
        }

        file_info = self._request('GET', url, headers=headers,
                                  params={'ref': self.branch})
        if file_info:
            response = requests.get(file_info['download_url'])
            if response.status_code == 200:
                return StringIO(response.text)
            else:
                print(f"Failed to download file: "
                      f"{response.status_code} - {response.text}")
        else:
            print(f"File '{file_path}' does not exist. Cannot download.")
        return None

    def download_csv(self, file_path):
        file_content = self.download_file(file_path)
        if file_content:
            return pd.read_csv(file_content)
        else:
            return pd.DataFrame()

    def upload_file(self, file_path, content):
        url = f'{self.base_url}/{file_path}'

        # Encode the content to base64
        encoded_content = base64.b64encode(content.encode()).decode()
        payload = {
            'message': 'Uploading file',
            'content': encoded_content,
            'branch': self.branch
        }

        headers = {
            'Authorization': f'token {self.token}',
            'Accept': 'application/vnd.github.v3+json'
        }

        # Check if the file already exists to get the current sha
        file_info = self._request('GET', url, headers=headers,
                                  params={'ref': self.branch})
        # If the file exists, get the current SHA
        if file_info:
            payload['sha'] = file_info.get('sha')

        # PUT request to create or update the file
        self._request('PUT', url, headers=headers, json=payload)

    def upload_df_as_csv(self, file_path, df):
        """Upload a DataFrame to a CSV file."""
        if not file_path.endswith('.csv'):
            file_path += '.csv'
        csv_content = df.to_csv(index=False)
        self.upload_file(file_path, csv_content)

    def remove_file(self, file_path):
        if not self.file_exists(file_path):
            print(f"File '{file_path}' does not exist. Skipping deletion.")
            return

        url = f'{self.base_url}/{file_path}'
        headers = {
            'Authorization': f'token {self.token}',
            'Accept': 'application/vnd.github.v3+json'
        }

        # Fetch the file info to get the SHA needed for deletion
        file_info = self._request('GET', url, headers=headers)
        if file_info:
            payload = {
                'message': 'Deleting file',
                'sha': file_info['sha'],
                'branch': self.branch
            }
            self._request('DELETE', url, headers=headers, json=payload)

#-------------------------------------------------------------------------------

from google.colab import userdata

github = GitHub(
    repo_owner='YorkJong',
    repo_name='stock-reports',
    token=userdata.get('GithubToken.stock-reports'),
    dir='data/ibd',
)

In [None]:
# @title DataFrame Operations

def is_taiwan_stock_df(df):
    ticker = df['Ticker'].iloc[0].replace('.TWO', '').replace('.TW', '')
    return ticker.isdigit()

def add_name_column(df):
    column_names = df.columns.tolist()
    if 'Name' in column_names:
        return df
    if 'Ticker' not in column_names:
        return df
    if df.empty:
        return df
    if not is_taiwan_stock_df(df):
        return df
    df['Name'] = None
    ticker_index = column_names.index('Ticker')
    column_names.insert(ticker_index + 1, 'Name')
    df = df[column_names]   # create a new DataFrame
    df['Name'] = df['Ticker'].apply(tw.stock_name)
    return df


def update_tickers_with_names(df, ticker_column, name_separator=','):
    """
    Update ticker codes in a DataFrame with their corresponding stock names.

    Parameters:
    - df: The DataFrame containing ticker codes.
    - ticker_column: The name of the column containing ticker codes.
    - name_separator: Separator used to join names (default is comma for multiple tickers).

    This function updates the specified column with the stock names instead of ticker codes.
    """
    # Iterate over the specified column in the DataFrame
    for index, row in df.iterrows():
        tickers = row[ticker_column].split(name_separator)  # Split the tickers string into a list
        stock_names = [tw.stock_name(ticker) for ticker in tickers]  # Get stock names for each ticker
        # Update the stock names back to the DataFrame
        df.at[index, ticker_column] = name_separator.join(stock_names)  # Join the names back into a string


def remove_ticker_suffix(df, ticker_column):
    """
    Remove the '.TW' or '.TWO' suffix from ticker codes in a DataFrame.

    Parameters:
    - df: The DataFrame containing ticker codes.
    - ticker_column: The name of the column containing ticker codes.
    """
    # Apply string replacement for each ticker in the specified column
    df[ticker_column] = df[ticker_column].str.replace('.TWO', '', regex=False)
    df[ticker_column] = df[ticker_column].str.replace('.TW', '', regex=False)


In [None]:
# @title Rank Function

import os
from datetime import datetime

from vistock import ibd
from vistock import tw
from vistock.stock_indices import get_tickers

def remove_failed_tickers(tickers):
    delisted = ['BRK.B', 'LEN.B', 'BF.B', 'UHAL.B', 'BF.A', 'CWEN.A', 'HEI.A']
    invalid = ['GEV', 'SOLV', 'VLTO', 'SW', 'ARM', 'CART', 'AS', 'BIRK', 'VSTS','LOAR', 'ALAB','GRAL', 'SEG']
    invalid += ['00945B.TW', '6928.TW', '6914.TW', '6771.TW', '00944.TW', '8162.TW', '1563.TW', '00946.TW', '00941.TW', '6423.TW', '00940.TW', '00939.TW', '4949.TW', '00943.TW', '8487.TW', '6794.TW', '6949.TW', '4771.TW']
    invalid += ['00936.TW', '6805.TW', '2254.TW', '6658.TW', '00935.TW', '6592B.TW', '6526.TW', '6906.TW', '4736.TW', '00636K.TW', '6968.TWO', '4442.TWO', '6534.TW', '6901.TW', '00934.TW', '00657K.TW', '6472.TW', '2258.TW', '6916.TW', '2762.TW', '6933.TW']
    invalid += ['02001R.TW', '020031.TW', '020039.TW', '020016.TW', '02001L.TW', '020019.TW', '020028.TW', '020020.TW', '02001S.TW', '020018.TW', '020038.TW', '020034.TW', '020011.TW', '020030.TW', '020012.TW', '020036.TW', '020029.TW', '020000.TW', '020015.TW', '020037.TW']
    invalid += ['6890.TW', '00951.TW', '3150.TW', '6957.TW', '00947.TW', '00949.TW']
    invalid += ['6838.TW', '00953B.TW', '00956.TW', '00954.TW']
    return list(set(tickers) - set(delisted) - set(invalid))

def rank(code, period='2y',  ticker_ref='^GSPC',
         rs_window='12mo',  out_dir='out'):
    tickers = get_tickers(code)
    #tickers = [t.lstrip('$') for t in tickers]
    tickers = remove_failed_tickers(tickers)

    rank_stock, rank_indust = ibd.rankings(tickers, period=period,
                                           ticker_ref=ticker_ref,
                                           rs_window=rs_window)
    if rank_stock.empty or rank_indust.empty:
        print("Not enough data to generate rankings.")
        return

    # Update the stock names back to the DataFrame
    #update_tickers_with_names(rank_stock, 'Ticker')
    update_tickers_with_names(rank_indust, 'Tickers')

    rank_stock = add_name_column(rank_stock)

    # Remove the '.TW' or '.TWO' suffix
    remove_ticker_suffix(rank_stock, 'Ticker')
    remove_ticker_suffix(rank_indust, 'Tickers')

    # Save to CSV
    print("\n\n***")
    os.makedirs(out_dir, exist_ok=True)
    today = datetime.now().strftime('%Y%m%d')
    for df, kind in zip([rank_stock, rank_indust],
                           ['stocks', 'industries']):
        filename = f'{code}_{kind}_{period}_ibd{rs_window}_{today}.csv'
        github.upload_df_as_csv(filename, df)
        df.to_csv(os.path.join(out_dir, filename), index=False)
        print(f'Your "{filename}" is in the "{out_dir}" folder.')
    print("***\n")

    return rank_stock, rank_indust

### Glossary of Terms

source (The source of stocks to analyze):
- This could include stocks traded on exchanges or components of a specific index.
- Common abbreviation(s) for the exchange or market sector.  
  - For Taiwan Markets, possible values include:
    - `TWSE`: Taiwan Stock Exchange (台灣上市股票交易所）
    - `TPEX`: Taipei Exchange （上櫃交易所）
    - `ESB`: Emerging Stock Board （興櫃交易所）
  - Can also be combined with '+' (e.g., `TWSE+TPEX`, `TWSE+TPEX+ESB`)
  - For America Markets, possible values include:
    - `SPX`: S&P 500 (標普五百指數)
    - `DJIA`: Dow Jones Industrial Average (道瓊指數)
    - `NDX`: NASDAQ-100 (納斯達克一百指數)
    - `SOX`: PHLX Semiconductor Index （費半指數）
  - Multiple indices can be combined using '+' (e.g., `SPX+DJIA+NDX+SOX`)

period (Historical Data Time Range)：
- The time range for which to fetch historical data.
- `2y` means 2 years
- `6mo` means 6 monthes

rs_window (Period for RS calculation)
- The period for Relative Strength calculation
- `3mo` means 3 months
- `12mo` means 12 months

RS (Relative Strength)
- Relative Strength (RS) is a metric used to evaluate the performance of a stock relative to a benchmark index.
  - A higher RS rating indicates that the stock has outperformed the index, while a lower RS rating suggests underperformance.
  - A value of 100 represents the performance of the benchmark index or market.
- The IBD RS calculates the performance of the last year, with the most recent quarter weighted double.
- The IBD 3-month RS calculates the performance of the last quarter

### RS Rating and Ranking

In [None]:
source = "S&P 500" #@param ["S&P 500", "Dow Jones Industrial Average", "NASDAQ 100", "Russell 1000", "Russell 2000", "PHLX Semiconductor", "U.S. Listed Stocks"]
period = "1y" # @param ["1y","ytd","2y"]
rs_window = "3mo" # @param ["12mo", "3mo"]

code_from_name = {
    'S&P 500': 'SPX',
    'Dow Jones Industrial Average': 'DJIA',
    'NASDAQ 100': 'NDX',
    'Russell 1000': 'RUI',
    'Russell 2000': 'RUT',
    'PHLX Semiconductor': 'SOX',
    'U.S. Listed Stocks': 'U.S.Listed',
}

rank_stock, rank_indust = rank(code_from_name[source], period,
                               rs_window=rs_window)
for df in (rank_stock, rank_indust):
    display(data_table.DataTable(df, include_index=False, num_rows_per_page=10))

[*********************100%***********************]  498 of 498 completed
ERROR:yfinance:
1 Failed download:
ERROR:yfinance:['AMTM']: YFInvalidPeriodError("%ticker%: Period '1y' is invalid, must be one of ['1d', '5d', '1mo', 'ytd', 'max']")


[**********************100%**********************]  497 of 497 info downloaded
('Connection broken: IncompleteRead(2997946 bytes read, 1808124 more expected)', IncompleteRead(2997946 bytes read, 1808124 more expected)): https://www.tpex.org.tw/openapi/v1/tpex_mainboard_daily_close_quotes


***
Your "SPX_stocks_1y_ibd3mo_20241006.csv" is in the "out" folder.
Your "SPX_industries_1y_ibd3mo_20241006.csv" is in the "out" folder.
***



Unnamed: 0,Ticker,Price,Sector,Industry,Relative Strength,1 Month Ago,3 Months Ago,6 Months Ago,Percentile,Percentile (1M),Percentile (3M),Percentile (6M)
0,PLTR,40.01,Technology,Software—Infrastructure,134.52,124.14,103.50,119.43,100.00,99.80,79.07,97.18
1,MHK,154.44,Consumer Cyclical,"Furnishings, Fixtures & Appliances",125.38,113.55,96.33,114.51,99.80,97.59,47.08,94.97
2,IRM,118.28,Real Estate,REIT - Specialty,124.70,122.24,106.52,105.13,99.60,99.60,86.72,75.45
3,MMM,135.27,Industrials,Conglomerates,124.59,118.44,111.07,92.74,99.40,99.20,94.16,23.54
4,FICO,1913.38,Technology,Software—Application,123.52,119.63,104.56,107.83,99.20,99.40,82.80,83.90
...,...,...,...,...,...,...,...,...,...,...,...,...
492,DLTR,70.90,Consumer Defensive,Discount Stores,71.73,77.79,82.25,100.26,1.01,1.61,3.42,56.34
493,DXCM,68.11,Healthcare,Medical Devices,68.24,70.71,92.99,108.14,0.80,0.40,30.58,84.71
494,SMCI,41.23,Technology,Computer Hardware,66.92,83.47,122.66,272.38,0.60,3.82,98.39,100.00
495,MRNA,60.20,Healthcare,Biotechnology,65.27,81.84,125.19,106.36,0.40,2.41,98.59,80.48


Unnamed: 0,Industry,Sector,Relative Strength,1 Month Ago,3 Months Ago,6 Months Ago,Tickers,Percentile,Percentile (1M),Percentile (3M),Percentile (6M)
0,"Furnishings, Fixtures & Appliances",Consumer Cyclical,125.38,113.55,96.33,114.51,MHK,100.00,99.21,49.61,95.28
1,Gold,Basic Materials,118.60,113.77,112.33,81.91,NEM,99.21,100.00,94.49,0.79
2,Residential Construction,Consumer Cyclical,114.37,104.84,96.29,107.92,"DHI,PHM,NVR,LEN",98.43,89.76,48.82,88.98
3,Insurance—Life,Financial Services,114.32,108.83,99.99,92.65,AFL,97.64,96.85,72.44,18.90
4,REIT—Industrial,Real Estate,113.57,106.41,94.25,93.96,PSA,96.85,92.91,35.43,25.98
...,...,...,...,...,...,...,...,...,...,...,...
122,Oil & Gas Refining & Marketing,Energy,90.20,90.12,96.55,108.24,"MPC,PSX,VLO",3.94,8.66,51.97,89.76
123,Oil & Gas E&P,Energy,89.40,90.53,98.40,91.24,"EOG,MRO,FANG,COP,HES,APA,EQT,DVN,CTRA,OXY",3.15,9.45,62.99,14.17
124,Chemicals,Basic Materials,89.39,86.06,93.06,100.40,"DOW,CE",2.36,2.36,25.20,54.72
125,Steel,Basic Materials,88.84,85.54,90.88,103.52,"STLD,NUE",1.57,1.57,13.39,74.02


### RS Rating and Ranking for Taiwan Stocks

In [None]:
source = "上市+上櫃" #@param ["上市", "上櫃", "上市+上櫃", "興櫃", "全部"]
period = "1y" # @param ["1y","ytd","2y"]
rs_window = "3mo" # @param ["12mo", "3mo"]

code_from_name = {
    '上市': 'TWSE',
    '上櫃': 'TPEX',
    '上市+上櫃': 'TWSE+TPEX',
    '興櫃': 'ESB',
    '全部': 'TWSE+TPEX+ESB'
}

tw_stocks, tw_industries = rank(code_from_name[source], period,
                                ticker_ref='^TWII', rs_window=rs_window)
for df in (tw_stocks, tw_industries):
    display(data_table.DataTable(df, include_index=False, num_rows_per_page=10))

### Remove files in GitHub Repository

In [None]:
# @title CSV Deleter
import re
import ipywidgets as widgets

# Example filenames
with requests_cache.disabled():
    all_filenames = github.list_filenames()

# Function to extract unique dates from filenames
def extract_dates(filenames):
    date_pattern = r'\d{8}'
    dates = set()
    for fn in filenames:
        match = re.search(date_pattern, fn)
        if match:
            dates.add(match.group(0))
    return sorted(dates, reverse=True)  # Sort dates from newest to oldest

# Function to remove a file (replace with your actual implementation)
def remove_file(filename):
    print(f"Removing file: {filename}")
    with requests_cache.disabled():
        github.remove_file(filename)
    all_filenames.remove(filename)

#-------------------------------------------------------------------------------

# Update file selector options based on selected date
def update_file_selector(change):
    def selector_width(filenames):
        max_filename_length = max(len(fn) for fn in filenames)
        return f'{max_filename_length * 10}px'  # 10px width per character

    selected_date = change['new']
    lst_fns = [fn for fn in all_filenames if selected_date in fn]
    file_selector_widget.options = lst_fns
    file_selector_widget.rows = len(file_selector_widget.options)
    if lst_fns:
        file_selector_widget.layout=widgets.Layout(width=selector_width(lst_fns))

# Function to delete selected files
def delete_files(button):
    selected_files = file_selector_widget.value
    for file in selected_files:
        remove_file(file)
    update_widgets()    # Update widgets after deletion

# Update widgets to reflect current state
def update_widgets():
    # Refresh the date selector
    dates = extract_dates(all_filenames)
    selected_date = date_selector_widget.value
    if dates and selected_date not in dates:
        i = date_selector_widget.options.index(selected_date)
        if i > len(dates) - 1:
            selected_date = dates[-1]
        else:
            selected_date = dates[i]
    if dates:
        date_selector_widget.options = dates
        date_selector_widget.value = selected_date
    else:
        file_selector_widget.options = []
        return
    update_file_selector({'new': date_selector_widget.value})

#-------------------------------------------------------------------------------

# Create a widget for selecting dates
def create_date_selector(dates):
    return widgets.Dropdown(
        options=dates,
        value = dates[0] if dates else None,
        description='Date:',
        disabled=False
    )

# Create a widget for selecting files
def create_file_selector(filenames):

    return widgets.SelectMultiple(
        options=[],
        value=[],
        description='Files',
        disabled=False,
    )

# Create widgets
dates = extract_dates(all_filenames)
date_selector_widget = create_date_selector(dates)
file_selector_widget = create_file_selector(all_filenames)
delete_button = widgets.Button(description="Delete Selected Files")
delete_button.on_click(delete_files)

# Initialize the file selector with the latest date
update_widgets()

# Set up the observer to update file selector when date is changed
date_selector_widget.observe(update_file_selector, names='value')

# Display widgets
display(date_selector_widget)
display(file_selector_widget)
display(delete_button)
