目標: 比較各族群在每個特徵的逐年變化。

In [None]:
import pandas as pd
import glob, os, pickle
#type
from dataclasses import dataclass
from typing import Optional, List
# plot
import plotly.express as ex
import plotly.graph_objects as go
from plotly.subplots import make_subplots
CMAP = ex.colors.qualitative.Vivid


In [None]:
def load_yearData(year):
    with open(f'./data/group_feat_summary_{year}.pickle', 'rb') as f:
        data_dict = pickle.load(f)
    holder = []
    holder.append(data_dict['main'].assign(gp_cate='main', gp_year=year))
    for k, df in data_dict['r'].items():
        holder.append(df.assign(gp_cate=k, gp_year=year))
    for k, df in data_dict['src_dst'].items():
        holder.append(df.assign(gp_cate=str(k), gp_year=year))
    return pd.concat(holder, axis=0)


In [None]:
holder = []
for y in range(2017, 2021):
    holder.append(load_yearData(y))
data = pd.concat(holder)

In [None]:
ft_names = pd.read_excel('./data/ft_names.xlsx')

In [None]:
# 依照cate來區分不同種類的圖表
ft_cate = 'basic'
ft = ft_names.query('cate==@ft_cate')
gp = ['main', 'KA', 'CA', 'BA', 'KB', 'JB']

plot_data = data.query("""
    variable_type.isin(@ft['variable_type']) &\
    variable.isin(@ft['variable']) &\
    year_type.isin(@ft['year_type']) &\
    gp_cate.isin(@gp) &\
    year == gp_year
""")

In [None]:
@dataclass
class line_attr:
    variable: str
    cate: str
    name: Optional[str] = None
    color: Optional[str] = None
    visible: Optional[str] = None
    legendgroup: Optional[str] = None
    showlegend: bool = True
    opacity: float = 1

@dataclass
class area_attr:
    cate: str
    ceil_variable: str
    mid_variable: str
    bottom_variable: str
    name: Optional[str] = None
    color: Optional[str] = None
    visible: Optional[str] = 'legendonly'
    showlegend: bool = False
    opacity: float = .5
    mid_opacity: float = 1

def add_lines(fig, df, list_line_attr: List[line_attr], row, col):
    for l in list_line_attr:
        g = df.query(" variable==@l.variable & gp_cate==@l.cate ")
        fig.add_trace(go.Scatter(x=g['year'],
                                 y=g['value'],
                                 line={'color': l.color},
                                 opacity=l.opacity,
                                 mode='lines+markers',
                                 name=l.name if l.name is not None else l.cate,
                                 visible=l.visible,
                                 showlegend=l.showlegend,
                                 legendgroup=l.legendgroup,
                                 ),
                      row=row,
                      col=col)


def add_areas(fig, df, list_area_attr: List[line_attr], row, col):
    for a in list_area_attr:
        g_ceil = df.query(" variable == @a.ceil_variable & gp_cate==@a.cate")
        fig.add_trace(go.Scatter(x=g_ceil['year'],
                                 y=g_ceil['value'],
                                 line={
                                     'color': a.color,
                                     'dash': 'dot'
                                 },
                                 opacity=a.opacity,
                                 mode='lines+markers',
                                 name='Pr75',
                                 visible=a.visible,
                                 showlegend=a.showlegend,
                                 legendgroup=a.name if a.name is not None else a.cate,
                                 ),
                      row=row,
                      col=col)
        g_bottom = df.query(" variable == @a.bottom_variable & gp_cate==@a.cate")
        fig.add_trace(go.Scatter(x=g_bottom['year'],
                                 y=g_bottom['value'],
                                 line={
                                     'color': a.color,
                                     'dash': 'dot'
                                 },
                                 opacity=a.opacity,
                                 mode='lines+markers',
                                 name='Pr25',
                                 visible=a.visible,
                                 showlegend=a.showlegend,
                                 legendgroup=a.name if a.name is not None else a.cate,
                                 fill='tonexty'),
                      row=row,
                      col=col)
        g_mid = df.query(" variable == @a.mid_variable & gp_cate==@a.cate")
        fig.add_trace(go.Scatter(x=g_mid['year'],
                                 y=g_mid['value'],
                                 line={
                                     'color': a.color,
                                     'dash': 'dash'
                                 },
                                 opacity=a.mid_opacity,
                                 mode='lines+markers',
                                 name=a.name if a.name is not None else a.cate,
                                 visible=a.visible,
                                 showlegend=True,
                                 legendgroup=a.name if a.name is not None else a.cate),
                      row=row,
                      col=col)



In [None]:
LINES_GP = [
    line_attr('age_Avg', 'main', color='red', name='main(Avg)', legendgroup='main'),
    line_attr('age_Avg', 'KA', color=CMAP[0], name='KA(Avg)', legendgroup='KA'),
    line_attr('age_Avg', 'JB', color=CMAP[1], name='JB(Avg)', legendgroup='JB'),
    line_attr('age_Avg', 'BA', color=CMAP[2], visible='legendonly', name='BA(Avg)', legendgroup='BA'),
    line_attr('age_Avg', 'KB', color=CMAP[3], visible='legendonly', name='KB(Avg)', legendgroup='KB'),
    line_attr('age_Avg', 'CA', color=CMAP[4], visible='legendonly', name='CA(Avg)', legendgroup='CA'),
]

AREAS_GP = [
    area_attr('main', 'age_Pr75', 'age_Pr50', 'age_Pr25', color='red', name='main'),
    area_attr('KA', 'age_Pr75', 'age_Pr50', 'age_Pr25', color=CMAP[0], name='KA', visible=True),
    area_attr('JB', 'age_Pr75', 'age_Pr50', 'age_Pr25', color=CMAP[1], name='JB', visible=True),
    area_attr('BA', 'age_Pr75', 'age_Pr50', 'age_Pr25', color=CMAP[2], name='BA'),
    area_attr('KB', 'age_Pr75', 'age_Pr50', 'age_Pr25', color=CMAP[3], name='KB'),
    area_attr('CA', 'age_Pr75', 'age_Pr50', 'age_Pr25', color=CMAP[4], name='CA')
]

In [None]:
from copy import deepcopy

In [None]:
def make_lineAttr(feat:str, showlegend=True):
    tem = deepcopy(LINES_GP)
    for i, _ in enumerate(tem):
        tem[i].variable = feat
        tem[i].showlegend = showlegend
    return tem

def make_areaAttr(feat:str, showlegend=True):
    ceil_var, mid_var, bot_var = f'{feat}_Pr75', f'{feat}_Pr50', f'{feat}_Pr25'
    tem = deepcopy(AREAS_GP)
    for i, _ in enumerate(tem):
        tem[i].ceil_variable = ceil_var
        tem[i].mid_variable = mid_var
        tem[i].bottom_variable = bot_var
        tem[i].showlegend = showlegend
    return tem

In [None]:
def make_fig(fig, )

In [None]:
# 訂單數
orders_lines_gp = make_lineAttr('n_orders')
# 人數
people_lines_gp = make_lineAttr('n_person', showlegend=False)
# 年齡
age_lines_gp, age_areas_gp = make_lineAttr('age_Avg', showlegend=False), make_areaAttr('age', showlegend=False)
# 女性占比
female_lines_gp = make_lineAttr('female_rate', showlegend=False)
# 婚姻占比
marr_lines_gp = make_lineAttr('marr_pct', showlegend=False)
# plot
fig = make_subplots(4, 2, shared_yaxes=True, subplot_titles=['訂單數', '人數', '年齡', '年齡(Prs)', '女性占比', '' , '未婚占比', ''])
add_lines(fig, plot_data, orders_lines_gp, row=1, col=1)
add_lines(fig, plot_data, people_lines_gp, row=1, col=2)
add_lines(fig, plot_data, age_lines_gp, row=2, col=1)
add_areas(fig, plot_data, age_areas_gp, 2, 2)
add_lines(fig, plot_data, female_lines_gp, row=3, col=1)
add_lines(fig, plot_data, marr_lines_gp, row=4, col=1)
fig.update_layout(height=550, hovermode='x unified')
fig

In [None]:
people_lines_gp