### 將Excel檔按不同主題/維度去分割，並自動另存輸出

following content first written in 2022.Mar.<br>

 - 說明:
 1. 這邊所輸入的維度標記完成之檔案是針對罷工站台或是站台上的維度標記（分析實驗室）之輸出格式做撰寫
 2. 維度部分只要帶有1、0判斷的欄位都可做使用

#### 前置作業

In [None]:
#設定環境
#-*-coding:UTF-8-*- 

import os, time, glob, socket

print("【日期時間】{}".format(time.strftime("%Y/%m/%d %H:%M:%S")))
print("【工作目錄】{}".format(os.getcwd()))
print("【主機名稱】{} ({})".format(socket.gethostname(),socket.gethostbyname(socket.gethostname())))

#設定自動存檔時間
%autosave 60

In [None]:
#安裝pandas套件

!pip install tqdm
import pandas as pd
from tqdm.notebook import trange, tqdm #可以出現進度條的模組
from time import sleep

In [None]:
#匯入excel檔
##要自行更換路徑

import warnings

with warnings.catch_warnings(record=True):
    warnings.simplefilter("always")
    df = pd.read_excel(r"C:\Users\mingyanlin\OneDrive\文件\01. Task\01. eLand\03. Analysis Reports\【Fubon Securities 富邦證券】季報\2022 Q2\DimTag_Result_22Q2fbs季報.xlsx", 
                       engine="openpyxl")

In [None]:
#檢視資料狀況

df.head()

In [None]:
#預先設定後續批次輸出的路徑，請替換成後續想要迴圈輸出的地方

path = "C:/Users/mingyanlin/OneDrive/文件/01. Task/01. eLand/03. Analysis Reports/【Fubon Securities 富邦證券】季報/2022 Q2/Dimtag/"

#### 1、按「主題」匯出

In [None]:
#建立一個主題list
##直接把所有文字複製貼進""中

brands = "富邦證券	國泰證券	永豐金證券	元大證券	凱基證券	日盛證券"
brands = brands.split("\t")
print(brands)

In [None]:
#迴圈輸出檔案

for i in tqdm(brands):
    newpath = path + str(i) + '.xlsx'
    df_by_brand = df[df["監測主題"] == i]
    df_by_brand.to_excel(newpath, index=False)
    sleep(0.5)

In [None]:
#如果出現urls的錯誤，可改用此code

for i in tqdm(brands):
    newpath = path + str(i) + '.xlsx'
    df_by_brand = df[df["監測主題"] == i]
    with pd.ExcelWriter(newpath, engine='xlsxwriter', engine_kwargs={'options': {'strings_to_numbers': True, 'strings_to_urls' : False}}) as writer:
        df_by_brand.to_excel(writer, index=False)
    sleep(0.5)

#### 2、按「維度」命中匯出

In [None]:
#建立一個維度list
##直接把所有文字複製貼進""中
###接著用空行"\t"去split這一段字串

dms = "線上開戶	台股	定期定額	海外股票	基金	信用	期權	其他商品"
dms = dms.split("\t")
print(dms)

In [None]:
#迴圈輸出檔案

for i in tqdm(dms):
    newpath = path + "【" + str(i) + "】" + '維度標記.xlsx'
    df_by_dms = df[df[i] == 1]
    df_by_dms.to_excel(newpath, index=False)
    sleep(0.5)

In [None]:
#如果出現urls的錯誤，可改用此code

for i in tqdm(dms):
    newpath = path + "【" + str(i) + "】" + '維度標記.xlsx'
    df_by_dms = df[df[i] == 1]
    with pd.ExcelWriter(newpath, engine='openpyxl',
                        engine_kwargs={"options":{'strings_to_numbers': True, 'strings_to_urls' : False}}) as writer:
        df_by_dms.to_excel(writer, index=False)
    sleep(0.5)

In [None]:
#直接迴圈輸出每主題x每維度之數值

DimDic = {}

for i in tqdm(brands):
    for k in tqdm(dms):
        DimDic[i+"-"+k] = df[df.監測主題 == i][k].sum()

for m in DimDic.items():
    print(m)

In [None]:
#直接迴圈輸出每維度x每主題之數值

DimDic = {}

for i in tqdm(dms):
    for k in tqdm(brands):
        DimDic[i+"-"+k] = df[df.監測主題 == k][i].sum()

for m in DimDic.items():
    print(m)

In [None]:
DimDic = {}

for i in tqdm(brands):
    DimDic[i] = df[df.監測主題 == i]["台股"].sum() / 81
    
for m in DimDic.items():
    print(m)

In [None]:
DimDic = {}

for i in tqdm(brands):
    DimDic[i] = df[df.監測主題 == i]["台股"].sum()
    
for m in DimDic.items():
    print(m)

#### 3、圖表測試區

In [None]:
#安裝套件

!pip install matplotlib
import matplotlib.pyplot as plt

In [None]:
#查看現有安裝字體

import matplotlib.font_manager
 
a = sorted([f.name for f in matplotlib.font_manager.fontManager.ttflist])
 
for i in a:
    print(i)

In [None]:
#指定其中一個字型做繪製

plt.rcParams['font.sans-serif'] = ['Microsoft YaHei']

In [None]:
#如果繪製直方圖之前要先做一些條件篩選，可用此段

ds = (df["來源"] == "新聞") & (df["主文/回文"] != "主文")

##確定條件篩選後之資料正確與否

ds = df[ds]
ds.head()

In [None]:
#繪製維度聲量直方圖

DimDic = {}

for i in tqdm(brands):
    DimDic[i] = df[df.監測主題 == i]["台股"].sum()
    
for m in DimDic.items():
    print(m)
    
xx = list(DimDic.keys())
yy = list(DimDic.values())

plt1 = plt.bar(xx, yy, alpha=1, color='#f2cece')
#alpha是設定透明度0~1；color可以指定顏色進去

plt.title("[ "+"台股"+" ]"+" 維度 x 各券商聲量直方圖")
plt.xlabel("券商品牌")
plt.ylabel("聲量則數")
plt.grid(axis='y', alpha=0.75)

for x,y in enumerate(yy):
    plt.text(x,y,'%s'%y,ha='center')

plt.show()