<a href="https://colab.research.google.com/github/yutao-data/Financial-Calculator/blob/main/Invest_Industry_Daily_PE.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [35]:
!pip install tushare
!pip install tabulate
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [19]:
import pandas as pd
import matplotlib.pyplot as plt
import tushare as ts
import os
from datetime import datetime
import time

# Initialize the Tushare API
api_key_path = '/content/drive/My Drive/tushare_data/api_key.txt'
with open(api_key_path, 'r') as file:
    api_key = file.readline().strip()
pro = ts.pro_api(api_key)

drive_path = '/content/drive/My Drive/tushare_data/industrial_pe'
if not os.path.exists(drive_path):
    os.makedirs(drive_path)

In [20]:
def get_sw_daily_data(trade_date):
    df = pro.sw_daily(trade_date=trade_date, fields='ts_code,trade_date,name,open,close,vol,pe,pb')
    return df

def store_data(df, file_name):
    file_path = os.path.join(drive_path, file_name)
    if os.path.exists(file_path):
        existing_df = pd.read_csv(file_path)
        df = pd.concat([existing_df, df]).drop_duplicates(subset=['ts_code', 'trade_date'])
    df.to_csv(file_path, index=False)
    return file_path

def load_data(file_name):
    file_path = os.path.join(drive_path, file_name)
    if os.path.exists(file_path):
        df = pd.read_csv(file_path)
    else:
        df = pd.DataFrame()
    return df

# 获取当天数据
trade_date = datetime.now().strftime('%Y%m%d')
df = get_sw_daily_data(trade_date)

# 存储数据到Google Drive
file_name = 'sw_daily_data.csv'
file_path = store_data(df, file_name)
print(f'Data stored at {file_path}')

Data stored at /content/drive/My Drive/tushare_data/industrial_pe/sw_daily_data.csv


In [21]:
def filter_top_volume_industries(df, top_percent=0.06):
    df = df.sort_values(by='vol', ascending=False)
    top_n = int(len(df) * top_percent)
    top_industries = df.head(top_n)
    return top_industries

# 筛选交易量在前6%的行业
top_industries = filter_top_volume_industries(df)

In [None]:
def get_historical_data(ts_code):
    historical_file = os.path.join(drive_path, f'{ts_code}_historical.csv')
    if os.path.exists(historical_file):
        historical_df = pd.read_csv(historical_file)
    else:
        historical_df = pd.DataFrame()

    end_date = datetime.now().strftime('%Y%m%d')
    start_date = (datetime.now() - pd.DateOffset(years=5)).strftime('%Y%m%d')
    df_list = []

    for year in range(10):
        for month in range(1, 13):
            date = f"{int(start_date[:4]) + year}{month:02}01"
            if date not in historical_df.get('trade_date', pd.Series()).values:
                df = pro.sw_daily(ts_code=ts_code, trade_date=date, fields='ts_code,trade_date,pe')
                if not df.empty:
                    df_list.append(df)

    if df_list:
        new_historical_df = pd.concat(df_list)
        historical_df = pd.concat([historical_df, new_historical_df]).drop_duplicates(subset=['ts_code', 'trade_date'])
        historical_df.to_csv(historical_file, index=False)

    return historical_df

# 获取所有交易量前6%的行业的历史数据并存储
historical_data = {}
for ts_code in top_industries['ts_code'].unique():
    historical_data[ts_code] = get_historical_data(ts_code)

In [37]:
from tabulate import tabulate

def analyze_data(current_df, historical_data):
    analysis_df = current_df[['ts_code', 'name', 'pe']].copy()
    analysis_df['historical_avg_pe'] = analysis_df['ts_code'].apply(lambda x: historical_data[x]['pe'].mean() if x in historical_data else None)
    analysis_df = analysis_df.dropna(subset=['historical_avg_pe'])

    analysis_df['pe_ratio'] = (analysis_df['pe'] / analysis_df['historical_avg_pe']) - 1

    analysis_df = analysis_df.sort_values(by='pe_ratio')
    lowest_pe_df = analysis_df.head(5)

    final_df = lowest_pe_df[['ts_code', 'name', 'pe', 'historical_avg_pe', 'pe_ratio']].copy()
    final_df['pe_ratio'] = final_df['pe_ratio'] * 100
    final_df.rename(columns={
        'ts_code': '行业代码',
        'name': '板块名称',
        'pe': '当前PE值',
        'historical_avg_pe': '历史平均PE值',
        'pe_ratio': '当前PE低于历史PE百分比'
    }, inplace=True)
    final_df['时间'] = datetime.now().strftime('%Y-%m-%d')

    return final_df

result_df = analyze_data(top_industries, historical_data)

print("相对于历史PE最低的5个主要行业:")
print(tabulate(result_df, headers='keys', tablefmt='grid', showindex=False))

相对于历史PE最低的5个主要行业:
+------------+--------------+------------+----------------+--------------------------+------------+
| 行业代码   | 板块名称     |   当前PE值 |   历史平均PE值 |   当前PE低于历史PE百分比 | 时间       |
| 801730.SI  | 电力设备     |      20.41 |        33.9479 |                 -39.8785 | 2024-07-29 |
+------------+--------------+------------+----------------+--------------------------+------------+
| 801005.SI  | 申万创业     |      26.51 |        41.3755 |                 -35.9283 | 2024-07-29 |
+------------+--------------+------------+----------------+--------------------------+------------+
| 801832.SI  | 中市净率指数 |      17.13 |        24.6766 |                 -30.5819 | 2024-07-29 |
+------------+--------------+------------+----------------+--------------------------+------------+
| 801260.SI  | 申万消费     |      18.28 |        25.3142 |                 -27.7876 | 2024-07-29 |
+------------+--------------+------------+----------------+--------------------------+------------+
| 801002.SI  | 申万中小     |  