In [None]:
from bs4 import BeautifulSoup
import pandas as pd
import requests

stock_targets = pd.read_csv('stock_targets.csv')
stock_targets

In [None]:
# 去掉開頭為 0 的股票代號，那些很多是 ETF，stockfeel 裡面沒有紀錄

prefix_not_equal_zero = ~stock_targets['no'].str.startswith('0')
stock_targets = stock_targets[prefix_not_equal_zero]
stock_targets

In [None]:
# 爬取股票資訊

print("Searching stocks info...")
stock_infos = []
for index, row in stock_targets.iterrows():
    url = 'https://www.stockfeel.com.tw/financial/?stock=' + row['no']
    response = requests.get(url)
    if response.status_code != 200:
        print(f"Failed to fetch data for {row['no']} {row['name']}")
        continue
    print(f"Fetching data for {row['no']} {row['name']}")

    soup = BeautifulSoup(response.text, 'html.parser')
    business_scope_title = soup.select('div.sin-content-original')[0].select('h3')[0].text
    business_scope_content = soup.select('div.sin-content-original')[0].select('p')[0].text
    competitor_title = soup.select('div.sin-content-original')[0].select('h3')[2].text
    competitor_content = soup.select('div.sin-content-original')[0].select('p')[2].text
    competitor_table = soup.select('div.sin-content-original')[0].select('table.table-content')[0]
    stocks = [stock.text for stock in competitor_table.find_all('td')]
    competitor_dict = { stock_no:stock_name for stock_name, stock_no in zip(stocks[::2], stocks[1::2]) }
    # print(business_scope_title+'？'+business_scope_content,competitor_title,competitor_content,competitor_dict)

    # write into csv
    stock_infos.append({
        'stock_no' : row['no'],
        'stock_name' : row['name'],
        'business_scope':business_scope_title+'？'+business_scope_content,
        'competitor':competitor_content+str(competitor_dict)
    })

stock_infos = pd.DataFrame(stock_infos)
stock_infos.to_csv("./stock_infos.csv"), index=False)

In [None]:
# 找找看有沒有遺漏沒爬下來的的股票

missing_numbers = set(stock_targets['no'].tolist()) - set(stock_infos['stock_no'].tolist())
missing_stocks_index = stock_targets['no'].isin(missing_numbers)
missing_stocks = stock_targets[missing_stocks_index]
missing_stocks

In [None]:
# 將遺漏的股票爬下來

missing_stock_infos = []
for index, row in missing_stocks.iterrows():
    print(row['no'], row['name'])

    url = 'https://www.stockfeel.com.tw/financial/?stock=' + row['no']
    response = requests.get(url)
    if response.status_code != 200:
        print(f"Failed to fetch data for {row['no']} {row['name']}")
        continue
    print(f"Fetching data for {row['no']} {row['name']}")
    
    soup = BeautifulSoup(response.text, 'html.parser')
    business_scope_title = soup.select('div.sin-content-original')[0].select('h3')[0].text
    business_scope_content = soup.select('div.sin-content-original')[0].select('p')[0].text
    competitor_title = soup.select('div.sin-content-original')[0].select('h3')[2].text
    competitor_content = soup.select('div.sin-content-original')[0].select('p')[2].text
    competitor_table = soup.select('div.sin-content-original')[0].select('table.table-content')[0]
    stocks = [stock.text for stock in competitor_table.find_all('td')]
    competitor_dict = { stock_no:stock_name for stock_name, stock_no in zip(stocks[::2], stocks[1::2]) }
    # print(business_scope_title+'？'+business_scope_content,competitor_title,competitor_content,competitor_dict)
    
    # write into csv
    missing_stock_infos.append({
        'stock_no' : row['no'],
        'stock_name' : row['name'],
        'business_scope':business_scope_title+'？'+business_scope_content,
        'competitor':competitor_content+str(competitor_dict)
    })

missing_stock_infos_df = pd.DataFrame(missing_stock_infos)
missing_stock_infos_df.to_csv("./stock_infos2.csv", index=False)