<a href="https://colab.research.google.com/github/yorkjong/vistock/blob/feature%2Franking_utils/notebooks/ibd_rs_rating.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

### Stock Analysis and Ranking with IBD RS Rating, inspired by the Investor's Business Daily (IBD) methodology.

### Install and Setup

#### Install Packages

In [1]:
%pip install "git+https://github.com/yorkjong/vistock.git@feature/ranking_utils"
%pip install requests-cache

Collecting git+https://github.com/yorkjong/vistock.git@feature/ranking_utils
  Cloning https://github.com/yorkjong/vistock.git (to revision feature/ranking_utils) to /tmp/pip-req-build-k4y0lvk9
  Running command git clone --filter=blob:none --quiet https://github.com/yorkjong/vistock.git /tmp/pip-req-build-k4y0lvk9
  Running command git checkout -b feature/ranking_utils --track origin/feature/ranking_utils
  Switched to a new branch 'feature/ranking_utils'
  Branch 'feature/ranking_utils' set up to track remote branch 'feature/ranking_utils' from 'origin'.
  Resolved https://github.com/yorkjong/vistock.git to commit f0c33b814d2a838cf3fe277e376c3b3b27340227
  Preparing metadata (setup.py) ... [?25l[?25hdone
Collecting mplfinance (from vistock==0.7.0)
  Downloading mplfinance-0.12.10b0-py3-none-any.whl.metadata (19 kB)
Downloading mplfinance-0.12.10b0-py3-none-any.whl (75 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m75.0/75.0 kB[0m [31m1.7 MB/s[0m eta [36m0:00:

#### Setup and Configuration

In [2]:
# @title Enable DataFrame Formatter
from google.colab import data_table
data_table.enable_dataframe_formatter()

In [3]:
# @title Enable Requests Cache
import requests_cache
requests_cache.install_cache('ibd_cache', expire_after=3600)

In [4]:
# @title GitHub
import base64
import requests
import pandas as pd
from io import StringIO


class GitHub:
    def __init__(self, repo_owner, repo_name, token, dir='', branch='main'):
        base = 'https://api.github.com/repos'
        dir = dir.strip('/')
        if dir:
            self.base_url = f'{base}/{repo_owner}/{repo_name}/contents/{dir}'
        else:
            self.base_url = f'{base}/{repo_owner}/{repo_name}/contents'
        self.branch = branch
        self.token = token

    def _request(self, method, url, headers=None, params=None, json=None):
        response = requests.request(method, url, headers=headers,
                                    params=params, json=json)
        if response.status_code in [200, 201]:
            return response.json()
        elif response.status_code == 404:
            return None
        else:
            print(f"Request failed: {response.status_code} - {response.json()}")
            return None

    def file_exists(self, file_path):
        url = f'{self.base_url}/{file_path}'
        headers = {
            'Authorization': f'token {self.token}',
            'Accept': 'application/vnd.github.v3+json',
        }
        response = requests.get(url, headers=headers,
                                params={'ref': self.branch})
        if response.status_code == 200:
            return True
        elif response.status_code == 404:
            return False
        else:
            print(f"Request failed: {response.status_code} - {response.json()}")
            return None

    def list_filenames(self, dir_path=''):
        url = f'{self.base_url}/{dir_path}'
        headers = {
            'Authorization': f'token {self.token}',
            'Accept': 'application/vnd.github.v3+json',
        }
        response = requests.get(url, headers=headers,
                                params={'ref': self.branch})
        if response.status_code == 200:
            files = response.json()
            return [item['name'] for item in files]
        elif response.status_code == 404:
            print(f"Directory '{dir_path}' does not exist.")
            return []
        else:
            print(f"Request failed: {response.status_code} - {response.json()}")
            return []

    def download_file(self, file_path):
        url = f'{self.base_url}/{file_path}'
        headers = {
            'Authorization': f'token {self.token}',
            'Accept': 'application/vnd.github.v3+json',
        }

        file_info = self._request('GET', url, headers=headers,
                                  params={'ref': self.branch})
        if file_info:
            response = requests.get(file_info['download_url'])
            if response.status_code == 200:
                return StringIO(response.text)
            else:
                print(f"Failed to download file: "
                      f"{response.status_code} - {response.text}")
        else:
            print(f"File '{file_path}' does not exist. Cannot download.")
        return None

    def download_csv(self, file_path):
        file_content = self.download_file(file_path)
        if file_content:
            return pd.read_csv(file_content)
        else:
            return pd.DataFrame()

    def upload_file(self, file_path, content):
        url = f'{self.base_url}/{file_path}'

        # Encode the content to base64
        encoded_content = base64.b64encode(content.encode()).decode()
        payload = {
            'message': 'Uploading file',
            'content': encoded_content,
            'branch': self.branch
        }

        headers = {
            'Authorization': f'token {self.token}',
            'Accept': 'application/vnd.github.v3+json'
        }

        # Check if the file already exists to get the current sha
        file_info = self._request('GET', url, headers=headers,
                                  params={'ref': self.branch})
        # If the file exists, get the current SHA
        if file_info:
            payload['sha'] = file_info.get('sha')

        # PUT request to create or update the file
        self._request('PUT', url, headers=headers, json=payload)

    def upload_df_as_csv(self, file_path, df):
        """Upload a DataFrame to a CSV file."""
        if not file_path.endswith('.csv'):
            file_path += '.csv'
        csv_content = df.to_csv(index=False)
        self.upload_file(file_path, csv_content)

    def remove_file(self, file_path):
        if not self.file_exists(file_path):
            print(f"File '{file_path}' does not exist. Skipping deletion.")
            return

        url = f'{self.base_url}/{file_path}'
        headers = {
            'Authorization': f'token {self.token}',
            'Accept': 'application/vnd.github.v3+json'
        }

        # Fetch the file info to get the SHA needed for deletion
        file_info = self._request('GET', url, headers=headers)
        if file_info:
            payload = {
                'message': 'Deleting file',
                'sha': file_info['sha'],
                'branch': self.branch
            }
            self._request('DELETE', url, headers=headers, json=payload)

#-------------------------------------------------------------------------------

from google.colab import userdata

github = GitHub(
    repo_owner='yorkjong',
    repo_name='stock-reports',
    token=userdata.get('GithubToken.stock-reports'),
    dir='data/ibd',
)

In [5]:
# @title Taiwan Stock Name Lookup

class StockNameLookup:
    _df = None  # Class-level variable to hold the DataFrame

    @classmethod
    def _load_data(cls):
        if cls._df is None:  # Check if the DataFrame is already loaded
            gh = GitHub(
                repo_owner='yorkjong',
                repo_name='stock-reports',
                token=userdata.get('GithubToken.stock-reports'),
                dir='data/stock_list',
            )
            cls._df = gh.download_csv('taiwan_stock_OpenAPI.csv')

    @classmethod
    def tw_stock_name(cls, ticker):
        cls._load_data()  # Ensure data is loaded before accessing

        code = ticker.split('.')[0]  # Extract the code part

        # Filter the DataFrame to find the stock name for the given code
        stock_name = cls._df.loc[cls._df['Code'] == code, 'Name']

        # Check if the stock_name is empty and return an appropriate message
        if not stock_name.empty:
            return stock_name.values[0]  # Return the first matched stock name
        else:
            return None  # Return None if ticker not found


def tw_stock_name(ticker):
    return StockNameLookup.tw_stock_name(ticker)

In [6]:
# @title DataFrame Operations

def is_taiwan_stock_df(df):
    ticker = df['Ticker'].iloc[0].replace('.TWO', '').replace('.TW', '')
    return ticker.isdigit()

def add_name_column(df):
    column_names = df.columns.tolist()
    if 'Name' in column_names:
        return df
    if 'Ticker' not in column_names:
        return df
    if df.empty:
        return df
    if not is_taiwan_stock_df(df):
        return df
    df['Name'] = None
    ticker_index = column_names.index('Ticker')
    column_names.insert(ticker_index + 1, 'Name')
    df = df[column_names]   # create a new DataFrame
    df['Name'] = df['Ticker'].apply(tw_stock_name)
    return df


def remove_ticker_suffix(df, ticker_column):
    """
    Remove the '.TW' or '.TWO' suffix from ticker codes in a DataFrame.

    Parameters:
    - df: The DataFrame containing ticker codes.
    - ticker_column: The name of the column containing ticker codes.
    """
    # Apply string replacement for each ticker in the specified column
    df[ticker_column] = df[ticker_column].str.replace('.TWO', '', regex=False)
    df[ticker_column] = df[ticker_column].str.replace('.TW', '', regex=False)


In [7]:
# @title Rank Function

import os
from datetime import datetime

from vistock import ibd
from vistock.stock_indices import get_tickers
from vistock.ranking_utils import append_ratings

def remove_failed_tickers(tickers):
    delisted = ['BRK.B', 'LEN.B', 'BF.B', 'UHAL.B', 'BF.A', 'CWEN.A', 'HEI.A']
    invalid = ['GEV', 'SOLV', 'VLTO', 'SW', 'ARM', 'CART', 'AS', 'BIRK', 'VSTS','LOAR', 'ALAB','GRAL', 'SEG']
    invalid += ['00945B.TW', '6928.TW', '6914.TW', '6771.TW', '00944.TW', '8162.TW', '1563.TW', '00946.TW', '00941.TW', '6423.TW', '00940.TW', '00939.TW', '4949.TW', '00943.TW', '8487.TW', '6794.TW', '6949.TW', '4771.TW']
    invalid += ['00936.TW', '6805.TW', '2254.TW', '6658.TW', '00935.TW', '6592B.TW', '6526.TW', '6906.TW', '4736.TW', '00636K.TW', '6968.TWO', '4442.TWO', '6534.TW', '6901.TW', '00934.TW', '00657K.TW', '6472.TW', '2258.TW', '6916.TW', '2762.TW', '6933.TW']
    invalid += ['02001R.TW', '020031.TW', '020039.TW', '020016.TW', '02001L.TW', '020019.TW', '020028.TW', '020020.TW', '02001S.TW', '020018.TW', '020038.TW', '020034.TW', '020011.TW', '020030.TW', '020012.TW', '020036.TW', '020029.TW', '020000.TW', '020015.TW', '020037.TW']
    invalid += ['6890.TW', '00951.TW', '3150.TW', '6957.TW', '00947.TW', '00949.TW']
    invalid += ['6838.TW', '00953B.TW', '00956.TW', '00954.TW']
    return list(set(tickers) - set(delisted) - set(invalid))

def rank(code, period='2y',  ticker_ref='^GSPC',
         rs_window='12mo',  out_dir='out'):
    tickers = get_tickers(code)
    #tickers = [t.lstrip('$') for t in tickers]
    tickers = remove_failed_tickers(tickers)

    stock_df = ibd.build_stock_rs_df(tickers, period=period,
                                     ticker_ref=ticker_ref, rs_window=rs_window)
    stock_df = stock_df.sort_values(by='RS', ascending=False)

    rs_columns = ['RS', '1 Month Ago', '3 Months Ago', '6 Months Ago']
    rating_columns = ['Rating (RS)',
                      'Rating (1M)', 'Rating (3M)', 'Rating (6M)']
    stock_df = append_ratings(stock_df, rs_columns, rating_columns,
                              method='rank')
    if stock_df.empty:
        print("Not enough data to generate rankings.")
        return

    # Update the stock names back to the DataFrame

    stock_df = add_name_column(stock_df)

    # Remove the '.TW' or '.TWO' suffix
    remove_ticker_suffix(stock_df, 'Ticker')

    # Save to CSV
    print("\n\n***")
    os.makedirs(out_dir, exist_ok=True)
    today = datetime.now().strftime('%Y%m%d')
    filename = f'{code}_stocks_{period}_ibd{rs_window}_{today}.csv'
    github.upload_df_as_csv(filename, stock_df)
    stock_df.to_csv(os.path.join(out_dir, filename), index=False)
    print(f'Your "{filename}" is in the "{out_dir}" folder.')
    print("***\n")

    return stock_df

### Glossary of Terms

#### Source (The source of stocks to analyze)
- This could include stocks traded on exchanges or components of a specific index.
- Common abbreviation(s) for the exchange or market sector:
  - For Taiwan Markets, possible values include:
    - **`TWSE`**: Taiwan Stock Exchange (台灣上市股票交易所)
    - **`TPEX`**: Taipei Exchange (上櫃交易所)
    - **`ESB`**: Emerging Stock Board (興櫃交易所)
  - Can also be combined with `+` (e.g., `TWSE+TPEX`, `TWSE+TPEX+ESB`).
  - For America Markets, possible values include:
    - **`SPX`**: S&P 500 (標普五百指數)
    - **`DJIA`**: Dow Jones Industrial Average (道瓊指數)
    - **`NDX`**: NASDAQ-100 (納斯達克一百指數)
    - **`SOX`**: PHLX Semiconductor Index (費半指數)
  - Multiple indices can be combined using `+` (e.g., `SPX+DJIA+NDX+SOX`).

#### Period (Historical Data Time Range)
- The time range for which to fetch historical data.
  - **`2y`**: 2 years
  - **`6mo`**: 6 months

#### RS Window (Window for RS Calculation)
- The time window ('3mo' or '12mo') for Relative Strength:
  - **`3mo`**: 3 months
  - **`12mo`**: 12 months

#### RS (Relative Strength)
- Relative Strength (RS) is a metric used to evaluate the performance of a stock relative to a benchmark index.
  - A higher RS rating indicates that the stock has outperformed the index, while a lower RS rating suggests underperformance.
  - A value of 100 represents the performance of the benchmark index or market.
- The IBD RS calculates the performance over the last year, with the most recent quarter weighted double.
- The IBD 3-month RS calculates the performance over the last quarter.

#### RS Rating
- RS Rating, ranging from 1 (worst) to 99 (best), evaluates the price performance of a stock relative to a benchmark index.

### RS Rating and Ranking

In [8]:
source = "U.S. Listed Stocks" #@param ["S&P 500", "Dow Jones Industrial Average", "NASDAQ 100", "Russell 1000", "Russell 2000", "PHLX Semiconductor", "U.S. Listed Stocks"]
period = "1y" # @param ["1y","ytd","2y"]
rs_window = "3mo" # @param ["12mo", "3mo"]

code_from_name = {
    'S&P 500': 'SPX',
    'Dow Jones Industrial Average': 'DJIA',
    'NASDAQ 100': 'NDX',
    'Russell 1000': 'RUI',
    'Russell 2000': 'RUT',
    'PHLX Semiconductor': 'SOX',
    'U.S. Listed Stocks': 'U.S.Listed',
}

df = rank(code_from_name[source], period, rs_window=rs_window)
display(data_table.DataTable(df, include_index=False, num_rows_per_page=20))

[*********************100%***********************]  5430 of 5430 completed
ERROR:yfinance:
149 Failed downloads:
ERROR:yfinance:['PCSC', 'BKHA', 'MACIU', 'SMTK', 'EHGO', 'RAPP', 'YYGH', 'HAFN', 'INBX', 'IBAC', 'MFI', 'TWG', 'LIF', 'SPMC', 'CTRI', 'DYCQ', 'RECT', 'RFAI', 'PACS', 'BTOC', 'NNE', 'ALMS', 'ZK', 'ABVE', 'RFAIU', 'RAY', 'VIK', 'JDZG', 'GAUZ', 'KDLY', 'CTNM', 'CDTG', 'TBN', 'SHMD', 'NCI', 'MRX', 'ALFUU', 'WAY', 'BOW', 'LB', 'GPATU', 'SILA', 'FSHPU', 'HDL', 'RBRK', 'MNDR', 'JUNE', 'LSH', 'TEM', 'FLYE', 'CCIXU', 'WBTN', 'SUUN', 'EPRX', 'ULS', 'EURKU', 'CUBWU', 'PAL', 'TRSG', 'CCIX', 'ZOOZ', 'GPAT', 'SVCO', 'IBTA', 'KBDC', 'MTEN']: YFInvalidPeriodError("%ticker%: Period '1y' is invalid, must be one of ['1d', '5d', '1mo', '3mo', '6mo', 'ytd', 'max']")
ERROR:yfinance:['XCH', 'TDTH', 'MBX', 'YHNAU', 'HTLM', 'VCICU', 'EURK', 'ZJK', 'BIOA', 'MBAV', 'IBG', 'PC', 'CAPNU', 'GLXG', 'FVR', 'BKV', 'AMTM', 'EQV', 'DTSQ', 'CURB', 'GLE', 'BCAX', 'SARO', 'AAM', 'ZBIO', 'WCT', 'GRDN', 'VACH', 'Z

[**********************100%**********************]  5429 of 5429 info downloaded


***
Your "U.S.Listed_stocks_1y_ibd3mo_20241008.csv" is in the "out" folder.
***



Unnamed: 0,Ticker,Price,Sector,Industry,RS,1 Month Ago,3 Months Ago,6 Months Ago,Rating (RS),Rating (1M),Rating (3M),Rating (6M)
3221,TECX,32.41,Healthcare,Biotechnology,31313.66,138151.54,811.35,85.92,99,99,99,20
5164,MIRA,1.12,Healthcare,Drug Manufacturers—General,6354.81,2852.07,79.61,82.51,99,99,14,15
1931,ASNS,1.40,Technology,Communication Equipment,1363.10,12598.44,1076.41,90.93,99,99,99,35
3364,ZAPP,2.65,Consumer Cyclical,Auto Manufacturers,916.24,1680.05,79.20,93.90,99,99,13,46
3942,RGC,4.40,Healthcare,Drug Manufacturers—Specialty & Generic,893.41,554.42,87.64,48.87,99,99,26,3
...,...,...,...,...,...,...,...,...,...,...,...,...
348,BJDX,0.13,Healthcare,Medical Devices,14.28,16.51,46.51,34.76,1,1,2,2
1104,CDT,0.10,Healthcare,Biotechnology,12.58,12.41,73.36,151.67,1,1,9,94
4676,SYTA,0.88,Technology,Communication Equipment,9.96,14.17,57.63,72.15,1,1,4,7
4568,CNSP,0.11,Healthcare,Biotechnology,9.75,7.96,30.67,33.55,1,1,1,2


### RS Rating and Ranking for Taiwan Stocks

In [9]:
source = "上市+上櫃" #@param ["上市", "上櫃", "上市+上櫃", "興櫃", "全部"]
period = "1y" # @param ["1y","ytd","2y"]
rs_window = "3mo" # @param ["12mo", "3mo"]

code_from_name = {
    '上市': 'TWSE',
    '上櫃': 'TPEX',
    '上市+上櫃': 'TWSE+TPEX',
    '興櫃': 'ESB',
    '全部': 'TWSE+TPEX+ESB'
}

df = rank(code_from_name[source], period, ticker_ref='^TWII',
          rs_window=rs_window)
display(data_table.DataTable(df, include_index=False, num_rows_per_page=20))

[*********************100%***********************]  2022 of 2022 completed
ERROR:yfinance:
7 Failed downloads:
ERROR:yfinance:['00952.TW', '3716.TW']: YFInvalidPeriodError("%ticker%: Period '1y' is invalid, must be one of ['1d', '5d', '1mo', '3mo', 'ytd', 'max']")
ERROR:yfinance:['2897B.TW', '6923.TW', '6969.TW', '6919.TW', '00960.TW']: YFInvalidPeriodError("%ticker%: Period '1y' is invalid, must be one of ['1d', '5d', '1mo', 'ytd', 'max']")


[**********************100%**********************]  2021 of 2021 info downloaded


***
Your "TWSE+TPEX_stocks_1y_ibd3mo_20241008.csv" is in the "out" folder.
***



Unnamed: 0,Ticker,Name,Price,Sector,Industry,RS,1 Month Ago,3 Months Ago,6 Months Ago,Rating (RS),Rating (1M),Rating (3M),Rating (6M)
1076,6144,得利影,114.00,Communication Services,Entertainment,327.34,318.63,169.01,101.06,99,99,98,74
164,3230,錦明,57.50,Technology,Electronic Components,271.30,202.71,126.99,103.05,99,98,94,78
8,4510,高鋒,42.10,Industrials,Specialty Industrial Machinery,239.07,190.35,94.26,90.49,99,98,57,34
189,8374,羅昇,136.00,Industrials,Industrial Distribution,238.98,300.20,175.20,91.93,99,99,98,43
125,8937,合騏,80.70,Consumer Cyclical,Recreational Vehicles,230.92,176.43,94.22,89.04,99,98,57,25
...,...,...,...,...,...,...,...,...,...,...,...,...,...
1079,2329,華泰,38.70,Technology,Semiconductors,71.32,72.65,85.26,116.47,1,1,18,91
1623,3228,金麗科,230.50,Technology,Semiconductors,70.24,76.80,90.55,209.35,1,2,44,99
1440,8088,品安,26.55,Technology,Computer Hardware,68.61,73.87,89.63,97.32,1,1,39,63
172,8085,福華,40.00,Technology,Electronic Components,68.24,76.39,101.70,139.03,1,2,74,97


### Remove files in GitHub Repository

In [12]:
# @title CSV Deleter
import re
import ipywidgets as widgets

# Example filenames
with requests_cache.disabled():
    all_filenames = github.list_filenames()

# Function to extract unique dates from filenames
def extract_dates(filenames):
    date_pattern = r'\d{8}'
    dates = set()
    for fn in filenames:
        match = re.search(date_pattern, fn)
        if match:
            dates.add(match.group(0))
    return sorted(dates, reverse=True)  # Sort dates from newest to oldest

# Function to remove a file (replace with your actual implementation)
def remove_file(filename):
    print(f"Removing file: {filename}")
    with requests_cache.disabled():
        github.remove_file(filename)
    all_filenames.remove(filename)

#-------------------------------------------------------------------------------

# Update file selector options based on selected date
def update_file_selector(change):
    def selector_width(filenames):
        max_filename_length = max(len(fn) for fn in filenames)
        return f'{max_filename_length * 10}px'  # 10px width per character

    selected_date = change['new']
    lst_fns = [fn for fn in all_filenames if selected_date in fn]
    file_selector_widget.options = lst_fns
    file_selector_widget.rows = len(file_selector_widget.options)
    if lst_fns:
        file_selector_widget.layout=widgets.Layout(width=selector_width(lst_fns))

# Function to delete selected files
def delete_files(button):
    selected_files = file_selector_widget.value
    for file in selected_files:
        remove_file(file)
    update_widgets()    # Update widgets after deletion

# Update widgets to reflect current state
def update_widgets():
    # Refresh the date selector
    dates = extract_dates(all_filenames)
    selected_date = date_selector_widget.value
    if dates and selected_date not in dates:
        i = date_selector_widget.options.index(selected_date)
        if i > len(dates) - 1:
            selected_date = dates[-1]
        else:
            selected_date = dates[i]
    if dates:
        date_selector_widget.options = dates
        date_selector_widget.value = selected_date
    else:
        file_selector_widget.options = []
        return
    update_file_selector({'new': date_selector_widget.value})

#-------------------------------------------------------------------------------

# Create a widget for selecting dates
def create_date_selector(dates):
    return widgets.Dropdown(
        options=dates,
        value = dates[0] if dates else None,
        description='Date:',
        disabled=False
    )

# Create a widget for selecting files
def create_file_selector(filenames):

    return widgets.SelectMultiple(
        options=[],
        value=[],
        description='Files',
        disabled=False,
    )

# Create widgets
dates = extract_dates(all_filenames)
date_selector_widget = create_date_selector(dates)
file_selector_widget = create_file_selector(all_filenames)
delete_button = widgets.Button(description="Delete Selected Files")
delete_button.on_click(delete_files)

# Initialize the file selector with the latest date
update_widgets()

# Set up the observer to update file selector when date is changed
date_selector_widget.observe(update_file_selector, names='value')

# Display widgets
display(date_selector_widget)
display(file_selector_widget)
display(delete_button)


Dropdown(description='Date:', options=('20241008',), value='20241008')

SelectMultiple(description='Files', layout=Layout(width='400px'), options=('TWSE+TPEX_stocks_1y_ibd3mo_2024100…

Button(description='Delete Selected Files', style=ButtonStyle())