<a href="https://colab.research.google.com/github/yorkjong/vistock/blob/feature%2Fibd/notebooks/ibd_rs_rating.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

### Stock Analysis and Ranking with IBD RS Rating, inspired by the Investor's Business Daily (IBD) methodology.

### Install and Setup (免費版Colab會固定時間清掉安裝的東西，所以重安裝是新連線後最先要做的事)

#### Install Packages

In [31]:
%pip install "git+https://github.com/yorkjong/vistock.git@feature/ibd"
%pip install requests-cache

Collecting git+https://github.com/yorkjong/vistock.git@feature/ibd
  Cloning https://github.com/yorkjong/vistock.git (to revision feature/ibd) to /tmp/pip-req-build-ovdjnh2c
  Running command git clone --filter=blob:none --quiet https://github.com/yorkjong/vistock.git /tmp/pip-req-build-ovdjnh2c
  Running command git checkout -b feature/ibd --track origin/feature/ibd
  Switched to a new branch 'feature/ibd'
  Branch 'feature/ibd' set up to track remote branch 'feature/ibd' from 'origin'.
  Resolved https://github.com/yorkjong/vistock.git to commit 0f3a9fbf159542d0392cbbde079d7e812a3fc237
  Preparing metadata (setup.py) ... [?25l[?25hdone


#### Setup and Configuration

In [32]:
# @title Enable DataFrame Formatter
from google.colab import data_table
data_table.enable_dataframe_formatter()

In [33]:
# @title Enable Requests Cache
import requests_cache
requests_cache.install_cache('ibd_cache', expire_after=3600)

In [34]:
# @title Initialize Widgets
import ipywidgets as widgets
output = widgets.Output()

In [35]:
# @title GitHub
import base64
import requests
import pandas as pd
from io import StringIO


class GitHub:
    def __init__(self, repo_owner, repo_name, token, dir='', branch='main'):
        base = 'https://api.github.com/repos'
        dir = dir.strip('/')
        if dir:
            self.base_url = f'{base}/{repo_owner}/{repo_name}/contents/{dir}'
        else:
            self.base_url = f'{base}/{repo_owner}/{repo_name}/contents'
        self.branch = branch
        self.token = token

    def _request(self, method, url, headers=None, params=None, json=None):
        response = requests.request(method, url, headers=headers,
                                    params=params, json=json)
        if response.status_code in [200, 201]:
            return response.json()
        else:
            print(f"Request failed: {response.status_code} - {response.json()}")
            return None

    def file_exists(self, file_path):
        url = f'{self.base_url}/{file_path}'
        headers = {
            'Authorization': f'token {self.token}',
            'Accept': 'application/vnd.github.v3+json',
        }
        response = requests.get(url, headers=headers,
                                params={'ref': self.branch})
        if response.status_code == 200:
            return True
        elif response.status_code == 404:
            return False
        else:
            print(f"Request failed: {response.status_code} - {response.json()}")
            return None

    def list_filenames(self, dir_path=''):
        url = f'{self.base_url}/{dir_path}'
        headers = {
            'Authorization': f'token {self.token}',
            'Accept': 'application/vnd.github.v3+json',
        }
        response = requests.get(url, headers=headers,
                                params={'ref': self.branch})
        if response.status_code == 200:
            files = response.json()
            return [item['name'] for item in files]
        else:
            print(f"Request failed: {response.status_code} - {response.json()}")
            return []

    def download_file(self, file_path):
        if not self.file_exists(file_path):
            print(f"File '{file_path}' does not exist. Cannot download.")
            return None

        url = f'{self.base_url}/{file_path}'
        headers = {
            'Authorization': f'token {self.token}',
            'Accept': 'application/vnd.github.v3+json',
        }

        file_info = self._request('GET', url, headers=headers)
        if file_info:
            download_url = file_info['download_url']
            response = requests.get(download_url)
            if response.status_code == 200:
                return StringIO(response.text)
            else:
                print(f"Failed to download file: "
                      f"{response.status_code} - {response.text}")
                return None
        return None

    def download_csv(self, file_path):
        file_content = self.download_file(file_path)
        if file_content:
            return pd.read_csv(file_content)
        else:
            return pd.DataFrame()

    def upload_file(self, file_path, content):
        url = f'{self.base_url}/{file_path}'
        headers = {
            'Authorization': f'token {self.token}',
            'Accept': 'application/vnd.github.v3+json'
        }

        encoded_content = base64.b64encode(content.encode()).decode()
        payload = {
            'message': 'Uploading file',
            'content': encoded_content,
            'branch': self.branch
        }
        self._request('PUT', url, headers=headers, json=payload)

    def upload_df_as_csv(self, file_path, df):
        """Upload a DataFrame to a CSV file."""
        if not file_path.endswith('.csv'):
            file_path += '.csv'
        csv_content = df.to_csv(index=False)
        self.upload_file(file_path, csv_content)

    def remove_file(self, file_path):
        if not self.file_exists(file_path):
            print(f"File '{file_path}' does not exist. Skipping deletion.")
            return

        url = f'{self.base_url}/{file_path}'
        headers = {
            'Authorization': f'token {self.token}',
            'Accept': 'application/vnd.github.v3+json'
        }

        # Fetch the file info to get the SHA needed for deletion
        file_info = self._request('GET', url, headers=headers)
        if file_info:
            sha = file_info['sha']
            payload = {
                'message': 'Deleting file',
                'sha': sha,
                'branch': self.branch
            }
            self._request('DELETE', url, headers=headers, json=payload)

#-------------------------------------------------------------------------------

from google.colab import userdata

github = GitHub(
    repo_owner='YorkJong',
    repo_name='stock-reports',
    token=userdata.get('GithubToken.stock-reports'),
    dir='ibd',
)

In [36]:
# @title Update and Filter DataFrame

def update_tickers_with_names(df, ticker_column, name_separator=','):
    """
    Update ticker codes in a DataFrame with their corresponding stock names.

    Parameters:
    - df: The DataFrame containing ticker codes.
    - ticker_column: The name of the column containing ticker codes.
    - name_separator: Separator used to join names (default is comma for multiple tickers).

    This function updates the specified column with the stock names instead of ticker codes.
    """
    # Iterate over the specified column in the DataFrame
    for index, row in df.iterrows():
        tickers = row[ticker_column].split(name_separator)  # Split the tickers string into a list
        stock_names = [tw.stock_name(ticker) for ticker in tickers]  # Get stock names for each ticker
        # Update the stock names back to the DataFrame
        df.at[index, ticker_column] = name_separator.join(stock_names)  # Join the names back into a string


def remove_ticker_suffix(df, ticker_column):
    """
    Remove the '.TW' or '.TWO' suffix from ticker codes in a DataFrame.

    Parameters:
    - df: The DataFrame containing ticker codes.
    - ticker_column: The name of the column containing ticker codes.
    """
    # Apply string replacement for each ticker in the specified column
    df[ticker_column] = df[ticker_column].str.replace('.TWO', '', regex=False)
    df[ticker_column] = df[ticker_column].str.replace('.TW', '', regex=False)


def filter_increasing_relative_strength(df):
    """
    Filter stocks with increasing Relative Strength over different time periods.

    This function filters the DataFrame to include only those stocks where:
    - Relative Strength is above 100.
    - Relative Strength has increased over the past 1 month, 3 months, and 6 months.
    Optionally, you can add a condition to check if Percentile is above 90.
    """
    return df[
        (df["Relative Strength"] > 100)
        & (df["Relative Strength"] > df["1 Month Ago"])
        & (df["1 Month Ago"] > df["3 Months Ago"])
        & (df["3 Months Ago"] > df["6 Months Ago"])
        # & (df["Percentile"] > 90)  # Uncomment to include Percentile filter
    ]


In [37]:
# @title Rank Function

import os
from datetime import datetime

from vistock import ibd
from vistock import tw
from vistock.stock_indices import get_tickers

def rank(code, period='2y', tickers_getter=get_tickers,
         ref_ticker='^GSPC', out_dir='out'):
    tickers = tickers_getter(code)

    output.clear_output()
    with output:
        rank_stock, rank_indust = ibd.rankings(tickers, period=period,
                                               ref_ticker=ref_ticker)
    if rank_stock.empty or rank_indust.empty:
        print("Not enough data to generate rankings.")
        return

    # Update the stock names back to the DataFrame
    #update_tickers_with_names(rank_stock, 'Ticker')
    #update_tickers_with_names(rank_indust, 'Tickers')

    # Remove the '.TW' or '.TWO' suffix
    remove_ticker_suffix(rank_stock, 'Ticker')
    remove_ticker_suffix(rank_indust, 'Tickers')

    # Save to CSV
    print("\n\n***")
    os.makedirs(out_dir, exist_ok=True)
    today = datetime.now().strftime('%Y%m%d')
    for df, kind in zip([rank_stock, rank_indust],
                           ['stocks', 'industries']):
        filename = f'{code}_{kind}_{period}_{today}.csv'
        github.upload_df_as_csv(filename, df)
        df.to_csv(os.path.join(out_dir, filename), index=False)
        print(f'Your "{filename}" is in the "{out_dir}" folder.')
    print("***\n")

    return rank_stock, rank_indust

### Glossary of Terms

source (The source of stocks to analyze):
- This could include stocks traded on exchanges or components of a specific index.
- Common abbreviation(s) for the exchange or market sector.  
  - For Taiwan Markets, possible values include:
    - `TWSE`: Taiwan Stock Exchange (台灣上市股票交易所）
    - `TPEX`: Taipei Exchange （上櫃交易所）
    - `ESB`: Emerging Stock Board （興櫃交易所）
  - Can also be combined with '+' (e.g., `TWSE+TPEX`, `TWSE+TPEX+ESB`)
  - For America Markets, possible values include:
    - `SPX`: S&P 500 (標普五百指數)
    - `DJIA`: Dow Jones Industrial Average (道瓊指數)
    - `NDX`: NASDAQ-100 (納斯達克一百指數)
    - `SOX`: PHLX Semiconductor Index （費半指數）
  - Multiple indices can be combined using '+' (e.g., `SPX+DJIA+NDX+SOX`)

period (Historical Data Time Range)：
- The time range for which to fetch historical data.
- `2y` means 2 years
- `6mo` means 6 monthes

RS (Relative Strength)
- Relative Strength (RS) is a metric used to evaluate the performance of a stock relative to a benchmark index.
  - A higher RS rating indicates that the stock has outperformed the index, while a lower RS rating suggests underperformance.
- The IBD RS calculates the performance of the last year, with the most recent quarter weighted double.

min_percentile (最小百分位)
- The minimum percentile for a stock to be included in the rankings.

### Error Messages

In [38]:
display(output)

Output()

### RS Rating and Ranking

In [40]:
source = "All Indices" #@param ["S&P 500", "Dow Jones Industrial Average", "NASDAQ 100", "PHLX Semiconductor", "All Indices"]
period = "2y" # @param ["6mo","1y","ytd","2y"]

code_from_name = {
    'S&P 500': 'SPX',
    'Dow Jones Industrial Average': 'DJIA',
    'NASDAQ 100': 'NDX',
    'PHLX Semiconductor': 'SOX',
    'All Indices': 'SPX+DJIA+NDX+SOX',
}

rank_stock, rank_indust = rank(code_from_name[source], period)
display(rank_stock)
display(rank_indust)



***
Your "SPX+DJIA+NDX+SOX_stocks_2y_20240818.csv" is in the "out" folder.
Your "SPX+DJIA+NDX+SOX_industries_2y_20240818.csv" is in the "out" folder.
***



Unnamed: 0,Ticker,Sector,Industry,Relative Strength,1 Month Ago,3 Months Ago,6 Months Ago,Percentile,1 Month Ago.1,3 Months Ago.1,6 Months Ago.1,Rank
218,NVDA,Technology,Semiconductors,170.78,185.60,164.57,177.80,99,99,99,99,1
357,VST,Utilities,Utilities - Independent Power Producers,148.00,166.18,242.28,155.40,99,99,99,99,2
125,COHR,Technology,Scientific & Technical Instruments,147.09,142.89,118.70,136.01,99,99,92,97,3
118,GDDY,Technology,Software - Infrastructure,141.03,130.40,137.25,120.04,99,98,97,93,4
135,IRM,Real Estate,REIT - Specialty,139.35,128.54,113.39,110.43,99,97,87,80,5
...,...,...,...,...,...,...,...,...,...,...,...,...
50,DXCM,Healthcare,Medical Devices,55.70,80.71,100.09,89.08,0,14,59,32,517
361,ALB,Basic Materials,Specialty Chemicals,52.57,59.86,77.15,65.17,0,0,5,1,518
324,INTC,Technology,Semiconductors,51.94,78.17,73.18,111.19,0,9,3,81,519
330,WBA,Healthcare,Pharmaceutical Retailers,46.59,46.40,62.27,74.66,0,0,0,5,520


Unnamed: 0,Industry,Sector,Relative Strength,1 Month Ago,3 Months Ago,6 Months Ago,Tickers,Percentile,1 Month Ago.1,3 Months Ago.1,6 Months Ago.1,Rank
80,Utilities - Independent Power Producers,Utilities,141.47,147.73,204.77,138.73,"VST,NRG",99,98,99,98,1
79,Medical Care Facilities,Healthcare,119.70,104.44,105.35,107.39,"UHS,HCA,DVA",98,86,81,83,2
10,Computer Hardware,Technology,115.06,129.34,141.18,154.74,"NTAP,ANET,SMCI,STX,HPQ,WDC",97,97,98,99,3
74,Oil & Gas Midstream,Energy,113.54,111.24,109.80,96.44,"TRGP,OKE,WMB,KMI",96,95,92,48,4
53,Tobacco,Consumer Defensive,111.62,101.12,99.73,83.62,"PM,MO",95,76,62,9,5
...,...,...,...,...,...,...,...,...,...,...,...,...
61,Oil & Gas Equipment & Services,Energy,82.01,82.38,88.03,81.34,"BKR,SLB,HAL",4,6,8,6,87
35,Steel,Basic Materials,80.29,87.71,97.68,106.76,"STLD,NUE",3,17,55,78,88
4,Auto Parts,Consumer Cyclical,78.16,76.06,85.02,79.06,"GPC,BWA,LKQ,APTV",2,0,4,4,89
69,Resorts & Casinos,Consumer Cyclical,75.26,76.91,77.85,85.72,"MGM,CZR,LVS,WYNN",1,2,0,18,90


In [41]:
# @title Top Percentile Stocks
min_percentile = 90 # @param {"type":"slider","min":1,"max":99,"step":1}
top_stocks = rank_stock[rank_stock[ibd.TITLE_PERCENTILE] >= min_percentile]
num_rows, _ = top_stocks.shape
print(f'\nnumber of filtered tickers: {num_rows}')
top_stock_list = list(top_stocks["Ticker"])
print(top_stock_list)


number of filtered tickers: 52
['NVDA', 'VST', 'COHR', 'GDDY', 'IRM', 'NRG', 'HWM', 'FICO', 'AXON', 'TRGP', 'AVGO', 'UHS', 'TSM', 'KKR', 'MMM', 'MPWR', 'NTAP', 'GE', 'ANET', 'K', 'ISRG', 'MHK', 'LLY', 'PGR', 'CFG', 'KLAC', 'META', 'NEM', 'IP', 'TYL', 'RCL', 'REGN', 'SMCI', 'GRMN', 'GS', 'VTR', 'FITB', 'TT', 'CEG', 'HCA', 'DVA', 'COST', 'RTX', 'DFS', 'AXP', 'MSI', 'EFX', 'NFLX', 'KEY', 'DECK', 'AFL', 'DHI']


In [42]:
# @title Filtered Stocks with Increasing RS > 100
filtered_rank_stock = filter_increasing_relative_strength(rank_stock)
display(filtered_rank_stock)

Unnamed: 0,Ticker,Sector,Industry,Relative Strength,1 Month Ago,3 Months Ago,6 Months Ago,Percentile,1 Month Ago.1,3 Months Ago.1,6 Months Ago.1,Rank
135,IRM,Real Estate,REIT - Specialty,139.35,128.54,113.39,110.43,99,97,87,80,5
411,TRGP,Energy,Oil & Gas Midstream,130.65,127.36,117.68,104.09,98,96,91,68,10
486,CFG,Financial Services,Banks - Regional,118.62,116.09,112.94,93.67,95,92,85,45,25
34,IP,Consumer Cyclical,Packaging & Containers,117.03,116.36,112.31,90.0,94,93,85,36,29
356,TYL,Technology,Software - Application,116.89,111.0,105.8,97.6,94,88,73,52,30
454,VTR,Real Estate,REIT - Healthcare Facilities,115.42,102.44,98.57,82.19,93,72,54,14,36
200,MSI,Technology,Communication Equipment,113.88,106.69,105.65,96.71,91,80,73,50,46
97,PM,Consumer Defensive,Tobacco,112.76,99.21,98.44,83.54,89,64,53,16,57
270,TTD,Technology,Software - Application,112.51,111.73,111.17,110.86,88,89,82,81,60
215,WELL,Real Estate,REIT - Healthcare Facilities,111.55,106.17,105.08,98.18,87,79,69,54,64


### RS Rating and Ranking for Taiwan Stocks

In [43]:
from vistock import tw

source = "上市+上櫃" #@param ["上市", "上櫃", "上市+上櫃", "興櫃", "全部"]
period = "2y" # @param ["6mo","1y","ytd","2y"]

code_from_name = {
    '上市': 'TWSE',
    '上櫃': 'TPEX',
    '上市+上櫃': 'TWSE+TPEX',
    '興櫃': 'ESB',
    '全部': 'TWSE+TPEX+ESB'
}

tw_stocks, tw_industries = rank(code_from_name[source], period,
     tickers_getter=tw.get_tickers, ref_ticker='^TWII')
display(tw_stocks)
display(tw_industries)



***
Your "TWSE+TPEX_stocks_2y_20240818.csv" is in the "out" folder.
Your "TWSE+TPEX_industries_2y_20240818.csv" is in the "out" folder.
***



Unnamed: 0,Ticker,Sector,Industry,Relative Strength,1 Month Ago,3 Months Ago,6 Months Ago,Percentile,1 Month Ago.1,3 Months Ago.1,6 Months Ago.1,Rank
1663,6144,Communication Services,Entertainment,440.94,353.05,127.34,84.52,99,99,92,20,1
306,8374,Industrials,Industrial Distribution,433.57,297.47,103.01,82.73,99,99,77,16,2
703,1799,Healthcare,Drug Manufacturers—Specialty & Generic,404.83,419.46,268.75,194.15,99,99,99,97,3
1032,6640,Technology,Semiconductors,383.26,319.59,245.60,196.13,99,99,99,98,4
1453,4562,Industrials,Specialty Industrial Machinery,343.37,285.12,162.65,88.33,99,99,97,35,5
...,...,...,...,...,...,...,...,...,...,...,...,...
1462,2734,Consumer Cyclical,Travel Services,54.35,62.58,78.54,86.01,0,1,27,26,2003
214,6929,Consumer Defensive,Packaged Foods,53.74,53.43,46.66,70.95,0,0,0,1,2004
1141,6291,Technology,Semiconductors,53.48,55.53,63.71,109.62,0,0,2,77,2005
488,2736,Consumer Cyclical,Lodging,52.88,64.24,78.39,82.41,0,2,26,15,2006


Unnamed: 0,Industry,Sector,Relative Strength,1 Month Ago,3 Months Ago,6 Months Ago,Tickers,Percentile,1 Month Ago.1,3 Months Ago.1,6 Months Ago.1,Rank
70,Industrial Distribution,Industrials,170.37,137.58,88.85,88.68,837491160831142373,98,96,54,29,1
22,Real Estate - Development,Real Estate,164.67,128.02,106.24,100.47,2524520625343056,97,94,91,72,2
63,Real Estate—Development,Real Estate,139.06,129.31,122.39,108.54,"5508,3188,5455,2537,1436,2718,3489,4907,6186,6...",96,95,99,86,3
79,Utilities—Renewable,Utilities,133.00,148.42,101.93,112.86,686968738087,95,97,86,93,4
75,Real Estate—Diversified,Real Estate,132.96,123.63,121.27,114.98,1438621999462545252055122547,94,93,98,95,5
...,...,...,...,...,...,...,...,...,...,...,...,...
80,Apparel Retail,Consumer Cyclical,73.53,74.16,67.86,80.47,14172911,3,5,0,3,97
100,Pharmaceutical Retailers,Healthcare,70.34,66.86,70.39,86.56,417341756469,2,0,1,18,98
58,Auto Manufacturers,Consumer Cyclical,69.90,74.81,78.17,95.30,15992206220422012227,1,7,13,61,99
46,Travel Services,Consumer Cyclical,69.08,83.46,96.52,87.53,2745273157066242271927432734,0,30,75,22,100


In [44]:
# @title Top Percentile Taiwan Stocks
min_percentile = 95 # @param {"type":"slider","min":1,"max":99,"step":1}

top_stocks = tw_stocks[tw_stocks[ibd.TITLE_PERCENTILE] >= min_percentile]
num_rows, _ = top_stocks.shape
print(f'\nnumber of filtered tickers: {num_rows}')
top_stock_list = list(top_stocks["Ticker"])
top_stock_list = [tw.stock_name(ticker) for ticker in top_stock_list]
print(top_stock_list)


number of filtered tickers: 101
['得利影', '羅昇', '易威', '均華', '穎漢', '昆盈', '光聖', '海悅', '福大', '皇昌', '福裕', '京城', '所羅門', '新復興', '欣巴巴', '慧友', '康全電訊', '太普高', '晶彩科', '弘塑', '擎亞', '均豪', '世紀', '弘憶股', '慶騰', '新門', '錦明', '彬台', '永信建', '訊舟', '福懋油', '翔耀', '順藥', '系微', '志聖', '高鋒', '天品', '鏵友益', '花王', '東捷', '鑫科', '天揚', '昇陽半導體', '鑫龍騰', '旺矽', '昇益', '聯上發', '三地開發', '安國', '藝舍-KY', '晟銘電', '天方能源', '精湛', '聯鈞', '德晉', '合騏', '訊聯基因', '及成', '京晨科', '華友聯', '晶悅', '和椿', '全譜', '精材', '華城', '愛山林', '鈊象', '喬福', '波力-KY', '鈺邦', '惠特', '雲豹能源', '萬潤', '迎廣', '泰金-KY', '勝昱', '峰源-KY', '森寶', '盟立', '友威科', '富宇', '藥華藥', '新潤', '理銘', '德律', '東科-KY', '達麗', '益登', '宏碩系統', '華義', '坤悅', '大城地產', '時碩工業', '富旺', '健椿', '岱稜', '信紘科', '亞光', '華景電', '雷科', '一詮']


In [45]:
# @title Filtered Taiwan Stocks with Increasing RS > 100

filtered_tw_stocks = filter_increasing_relative_strength(tw_stocks)
update_tickers_with_names(filtered_tw_stocks, 'Ticker')
display(filtered_tw_stocks)

filtered_tw_industries = filter_increasing_relative_strength(tw_industries)
update_tickers_with_names(filtered_tw_industries, 'Tickers')
display(filtered_tw_industries)

Unnamed: 0,Ticker,Sector,Industry,Relative Strength,1 Month Ago,3 Months Ago,6 Months Ago,Percentile,1 Month Ago.1,3 Months Ago.1,6 Months Ago.1,Rank
1663,得利影,Communication Services,Entertainment,440.94,353.05,127.34,84.52,99,99,92,20,1
306,羅昇,Industrials,Industrial Distribution,433.57,297.47,103.01,82.73,99,99,77,16,2
1032,均華,Technology,Semiconductors,383.26,319.59,245.60,196.13,99,99,99,98,4
1453,穎漢,Industrials,Specialty Industrial Machinery,343.37,285.12,162.65,88.33,99,99,97,35,5
640,海悅,Real Estate,Real Estate Services,298.87,258.33,191.38,120.48,99,99,98,85,8
...,...,...,...,...,...,...,...,...,...,...,...,...
365,元大MSCI台灣,Unknown,Unknown,103.97,103.83,102.94,99.15,78,78,77,62,427
1991,泰豐,Consumer Cyclical,Auto Parts,101.73,93.35,86.41,85.82,77,64,50,25,462
1565,和大,Consumer Cyclical,Auto Parts,101.51,89.16,74.19,71.46,76,57,15,2,466
648,臺企銀,Financial Services,Banks—Regional,101.08,100.25,100.05,88.57,76,74,74,37,472


Unnamed: 0,Industry,Sector,Relative Strength,1 Month Ago,3 Months Ago,6 Months Ago,Tickers,Percentile,1 Month Ago.1,3 Months Ago.1,6 Months Ago.1,Rank
70,Industrial Distribution,Industrials,170.37,137.58,88.85,88.68,"羅昇,明輝-DR,好德,震旦行",98,96,54,29,1
22,Real Estate - Development,Real Estate,164.67,128.02,106.24,100.47,"京城,坤悅,宏盛,富華新",97,94,91,72,2
63,Real Estate—Development,Real Estate,139.06,129.31,122.39,108.54,"永信建,鑫龍騰,昇益,聯上發,華友聯,晶悅,森寶,富宇,新潤,理銘,達麗,大城地產,櫻花建,...",96,95,99,86,3
75,Real Estate—Diversified,Real Estate,132.96,123.63,121.27,114.98,"三地開發,富旺,三發地產,皇翔,冠德,力麒,日勝生",94,93,98,95,5
24,Real Estate Services,Real Estate,116.21,113.18,111.51,104.2,"海悅,愛山林,綠意,名軒,台火,鉅陞,上曜,富裔,華建,全坤建,亞昕,宏璟,皇普,昇陽,潤隆...",90,90,96,80,9


### Remove files in GitHub Repository

In [46]:
# @title CSV Deleter
import re
import ipywidgets as widgets

# Example filenames
with requests_cache.disabled():
    all_filenames = github.list_filenames()

# Function to extract unique dates from filenames
def extract_dates(filenames):
    date_pattern = r'\d{8}'
    dates = set()
    for fn in filenames:
        match = re.search(date_pattern, fn)
        if match:
            dates.add(match.group(0))
    return sorted(dates, reverse=True)  # Sort dates from newest to oldest

# Function to remove a file (replace with your actual implementation)
def remove_file(filename):
    print(f"Removing file: {filename}")
    with requests_cache.disabled():
        github.remove_file(filename)
    all_filenames.remove(filename)

#-------------------------------------------------------------------------------

# Update file selector options based on selected date
def update_file_selector(change):
    def selector_width(filenames):
        max_filename_length = max(len(fn) for fn in filenames)
        return f'{max_filename_length * 10}px'  # 10px width per character

    selected_date = change['new']
    lst_fns = [fn for fn in all_filenames if selected_date in fn]
    file_selector_widget.options = lst_fns
    file_selector_widget.rows = len(file_selector_widget.options)
    file_selector_widget.layout=widgets.Layout(width=selector_width(lst_fns))

# Function to delete selected files
def delete_files(button):
    selected_files = file_selector_widget.value
    for file in selected_files:
        remove_file(file)
    update_widgets()    # Update widgets after deletion

# Update widgets to reflect current state
def update_widgets():
    # Refresh the date selector
    dates = extract_dates(all_filenames)
    selected_date = date_selector_widget.value
    if selected_date not in dates:
        i = date_selector_widget.options.index(selected_date)
        if i > len(dates) - 1:
            selected_date = dates[-1]
        else:
            selected_date = dates[i]
    date_selector_widget.options = dates
    date_selector_widget.value = selected_date
    if not dates:
        file_selector_widget.options = []
        return
    update_file_selector({'new': date_selector_widget.value})

#-------------------------------------------------------------------------------

# Create a widget for selecting dates
def create_date_selector(dates):
    return widgets.Dropdown(
        options=dates,
        value = dates[0],
        description='Date:',
        disabled=False
    )

# Create a widget for selecting files
def create_file_selector(filenames):

    return widgets.SelectMultiple(
        options=[],
        value=[],
        description='Files',
        disabled=False,
    )

# Create widgets
dates = extract_dates(all_filenames)
date_selector_widget = create_date_selector(dates)
file_selector_widget = create_file_selector(all_filenames)
delete_button = widgets.Button(description="Delete Selected Files")
delete_button.on_click(delete_files)

# Initialize the file selector with the latest date
update_widgets()

# Set up the observer to update file selector when date is changed
date_selector_widget.observe(update_file_selector, names='value')

# Display widgets
display(date_selector_widget)
display(file_selector_widget)
display(delete_button)


Dropdown(description='Date:', options=('20240818', '20240814', '20240813', '20240812'), value='20240818')

SelectMultiple(description='Files', layout=Layout(width='430px'), options=('SPX+DJIA+NDX+SOX_industries_2y_202…

Button(description='Delete Selected Files', style=ButtonStyle())