In [40]:
import pickle 
import pandas as pd
from functools import partial

with open('./radar_dict.pickle', 'rb') as f:
    radar_dict = pickle.load(f)
plot_data = pd.read_csv('plotData_main.csv').drop(["variable_type", "variable", "cate", "year_type"], axis=1).set_index("ft_name")
ib_data = pd.read_csv('plotData_main_iinsbuy.csv').drop(["variable_type", "variable", "cate", "year_type"], axis=1).set_index("ft_name")
ct_data = pd.read_csv('plotData_main_cartype.csv').drop(["variable_type", "variable", "cate", "year_type"], axis=1).set_index("ft_name")

def row_enhance(row, compare_row):
    if row.name == compare_row:
        return ["background-color:#A0B0AA ; color:black" for c in row.values]
    else:
        return ["background-color:None " for c in row.values]

def custom_formatter(styler):
    return styler.format(
        precision=2, 
        formatter={"女性占比": '{:.1%}', "Avg年齡": '{:,.1f}', "未婚占比": '{:.1%}', "保費/人": '{:,.0f}', "總賠付/總保費": '{:.1%}', "賠付金/人": '{:,.0f}'}
    )

def compare_table(data, base, compare, default_cols=["main"]):
    deltal = f"{compare}/{base}-1"
    coloring = partial(row_enhance, compare_row=compare)
    tem = data[default_cols+[base, compare]].copy()
    tem[deltal] = (tem[compare] - tem[base]) / tem[base]
    return tem.T.style.apply(coloring, axis=1).format(
        precision=2, 
        formatter={"女性占比": '{:.1%}', "Avg年齡": '{:,.1f}', "未婚占比": '{:.1%}', "保費/人": '{:,.0f}', "總賠付/總保費": '{:.1%}', "賠付金/人": '{:,.0f}', deltal:'{:.1%}'}
    ).background_gradient(
            cmap="coolwarm",
            subset= (deltal,tem.index[:]), axis=1
    ).format('{:.1%}', subset=(deltal,tem.index[:]))

## 目標: 比較不同族群之間的特徵

討論範圍:
1. 統計年 = 2020
2. 險種 = 買過車險(不含強制險)的客戶

所謂不同族群:
1. **不考慮**前一年使用的通路，比較使用不同通路的族群。 e.g. 在2020年的客戶中，比較使用JB v.s.使用KA的族群在特徵有甚麼不同。(即by通路)
2. **考慮**前一年使用的通路 & 今年使用的通路，比較去年使用同一通路，但今年改變通路的族群。 e.g. KA→KA的族群(未改變行為) v.s. KA→JB的族群(行為發生改變)

特徵列表:

**1. group1:**
| 特徵            | 邏輯                                                                      | 時間範圍    |
|:----------------|:---------------------------------------------------------------------------|:---------|
| 女性占比        | 女性人數/族群人數                                                          |  2020      |
| Avg年齡         | 族群平均年齡(每人)                                                         |  2020      |
| 未婚占比        | 未婚人數/族群人數                                                          |  2020      |
| Avg車齡         | 車齡平均(保單)                                                             |  2020      |
| Avg車數         | 車數平均(每人)                                                             | 2017~2020  |
| Avg通路慣性     | 切換通路前，連續使用同一個通路的次數(若沒切換則繼續累計) <br>(例一:KA,KA,KB=2; 例二:KB,KB,KB,KB=4) |  2017~2020    |
| 保費/人         | 總保費/族群人數                                                            |  2020      |
| 總賠付/總保費   | 即損率 (不只限於出險的人，而是族群所有人)                                          |  2020      |
| 賠付金/人       | 總賠付/族群中有出險的人數                                                    |  2020      |

**2. group2: 曾買過其他險別的人數佔比(2017~2020)**

**3. group3: 今年投保的車種百分比(2020)**


*討論:**Avg連續通路使用**可能方向:*
1. 使用最多次的通路
2. 轉換前連續使用通路 (若沒轉換繼續累計)

***

圖表說明:
1. 每條曲線表示 **族群**:
    - **main**: 2020年所有買過車險的客戶
    - **KA**: 2020年所有使用KA買過車險的客戶
    - 以此類推
2. 雷達圖:
    - **半徑**: 比**main**大多少。 例: **未婚占比_JB / 未婚占比_main** = 0.29/0.47 = **0.61**
    <br>(i.e. 把特徵分數標準化，讓所有特徵有相同的尺度)
    - <span style="color:#F6A198">**紅色範圍**</span>: 以**全體(main)**作為比較基準，半徑為1。
    
    <small>note: 半徑大，但其實兩者都很小是有可能發生的。 e.g. KA有過保住火 / main有保過住火 = 3.2% / 1.9% = 1.68。</small>

![intro](../../img/%E5%9C%96%E8%A1%A8%E8%AA%AA%E6%98%8E.png)




---
### **不考慮**前一年使用的通路

例: 2020年使用 {JB} 買過車險的客戶 (不管他們2019使用的是什麼通路)

1. **JB**相較其他族群:

|             |                      |
|:------------|:--------------------|
| **未婚占比** | 最低，29% v.s. 47%<small>(main)</small> |
|**保費/人**   | 最低，6,589 v.s. 7,753<small>(main)</small>, 10,814<small>(CA)</small>|
|**賠付金/人** | 偏低，48,927 v.s. 54,696<small>(main)</small> |
|**總賠付/總保費** | 最高，65% v.s. 59%<small>(main)</small>, 53%<small>(CA)</small>|
|**車種** | 以小客車(53%)、機車(34%)為主 |


2. 明顯特徵:

| | |
|:---|:---|
|**未婚占比** | KB(64%)、BA(61%)明顯較高; CA(34%)、JB(29%)則明顯偏低。 註: main(47%)|
|**Avg車齡** | 多在8~9年，以KA(9.78年)最高、CA(5.93年)年最低。|
|**Avg車數** | 多在1.3台左右，以KA(1.43台)最高、CA(1.18台)最低。|
|**Avg通路慣性** | 多在2~3次以下，僅CA(3.58次)、KA(3.47次)超過3次。值得注意的是，JB(2.9次)是黏著度第三名。|
|**保費/人** | CA(10,814)最高，JB(6,589)最低|


In [8]:
plot_data[["main", "CA", "KA", "JB", "BA", "KB"]].T.style.format(
        precision=2, 
        formatter={"女性占比": '{:.1%}', "Avg年齡": '{:,.1f}', "未婚占比": '{:.1%}', "保費/人": '{:,.0f}', "總賠付/總保費": '{:.1%}', "賠付金/人": '{:,.0f}'}
    ).apply(lambda row: row_enhance(row, "JB"), axis=1)

ft_name,女性占比,Avg年齡,未婚占比,Avg車齡,Avg車數,Avg連續通路使用,保費/人,總賠付/總保費,賠付金/人
main,50.1%,46.7,46.6%,8.35,1.29,3.11,7754,59.2%,54697
CA,51.5%,49.8,34.0%,5.93,1.18,3.58,10814,52.7%,56430
KA,49.9%,48.0,45.8%,9.78,1.43,3.47,8250,62.9%,55106
JB,50.5%,45.5,29.4%,8.93,1.35,2.9,6590,64.8%,48927
BA,52.8%,45.9,64.5%,8.91,1.32,2.77,7349,61.3%,57302
KB,48.4%,45.2,60.7%,8.72,1.36,2.8,6651,60.2%,57634


In [9]:
# 是"曾買過"
ib_data[["main", "CA", "KA", "JB", "BA", "KB"]].T.style.format(precision=2, formatter='{:.1%}').apply(lambda row: row_enhance(row, "JB"), axis=1)

ft_name,車險(任意),住火,團傷,個傷,旅平
main,100.0%,1.9%,2.1%,2.5%,5.0%
CA,100.0%,1.0%,1.7%,1.0%,2.6%
KA,100.0%,3.2%,2.8%,3.8%,7.3%
JB,100.0%,1.9%,1.7%,1.6%,6.9%
BA,100.0%,2.1%,1.9%,3.8%,4.3%
KB,100.0%,1.7%,2.8%,2.9%,3.8%


In [10]:
ct_data[["main", "CA", "KA", "JB", "BA", "KB"]].T.style.format(precision=2, formatter='{:.1%}').apply(lambda row: row_enhance(row, "JB"), axis=1)

ft_name,小客車,機車,進口車,小貨車,未填,電動自行車,其他,大型重機
main,50.3%,25.9%,13.6%,5.1%,3.2%,0.7%,0.6%,0.5%
CA,58.7%,2.5%,27.1%,11.4%,0.1%,0.0%,0.1%,0.1%
KA,54.0%,29.1%,11.2%,4.0%,0.1%,0.0%,0.7%,0.9%
JB,53.2%,33.8%,10.4%,1.5%,0.2%,0.0%,0.0%,0.8%
BA,43.5%,40.3%,7.7%,3.5%,0.2%,1.8%,2.4%,0.5%
KB,42.0%,40.0%,10.5%,5.5%,0.2%,0.9%,0.5%,0.5%


In [12]:
radar_dict['agg']


***
### **考慮**前一年使用的通路

例: 2019年使用 {KA} 買過車險的客戶，2020年轉移去使用 {JB, KB, CA, ...} 的客戶特徵有甚麼不同？


##### - 從KA出發
KA→JB(改變行為) v.s. KA→KA(未改變行為):

In [47]:
# 詳細比較數字
base, compare = "('KA', 'KA')", "('KA', 'JB')"
compare_table(plot_data, base, compare, default_cols=["main"])
# compare_table(plot_data, "JB", "('KA', 'JB')")

ft_name,女性占比,Avg年齡,未婚占比,Avg車齡,Avg車數,Avg連續通路使用,保費/人,總賠付/總保費,賠付金/人
main,50.1%,46.7,46.6%,8.35,1.29,3.11,7754,59.2%,54697
"('KA', 'KA')",50.6%,49.2,41.3%,10.39,1.49,4.10,8157,61.1%,53146
"('KA', 'JB')",51.1%,47.6,30.7%,9.84,1.95,2.20,9357,59.7%,46167
"('KA', 'JB')/('KA', 'KA')-1",1.0%,-3.1%,-25.7%,-5.4%,31.1%,-46.3%,14.7%,-2.4%,-13.1%


In [55]:
base, compare = "('KA', 'KA')", "('KA', 'JB')"
compare_table(ib_data, base, compare)
# compare_table(ib_data, "JB", "('KA', 'JB')")

ft_name,車險(任意),住火,團傷,個傷,旅平
main,1.00,0.02,0.02,0.03,0.05
"('KA', 'KA')",1.00,0.04,0.03,0.04,0.08
"('KA', 'JB')",1.00,0.03,0.03,0.03,0.09
"('KA', 'JB')/('KA', 'KA')-1",0.0%,-9.3%,2.9%,-33.5%,10.8%


In [51]:
base, compare = "('KA', 'KA')", "('KA', 'JB')"
compare_table(ct_data, base, compare)
# compare_table(ct_data, "JB", "('KA', 'JB')")

ft_name,小客車,機車,進口車,小貨車,未填,電動自行車,其他,大型重機
main,0.50,0.26,0.14,0.05,0.03,0.01,0.01,0.01
"('KA', 'KA')",0.56,0.26,0.12,0.04,0.00,0.00,0.01,0.01
"('KA', 'JB')",0.55,0.31,0.10,0.02,0.00,,0.00,0.02
"('KA', 'JB')/('KA', 'KA')-1",-3.3%,17.7%,-10.1%,-47.7%,8.3%,nan%,-54.5%,78.2%


In [4]:
# 圖形化(如果要一次比很多組)
radar_dict["from"]["KA"]

特徵清單

In [34]:
target = "^\('KA.*"

In [35]:
custom_formatter(plot_data.filter(regex=target).T.style).apply(lambda x: row_enhance(x, "('KA', 'JB')"), axis=1)

ft_name,女性占比,Avg年齡,未婚占比,Avg車齡,Avg車數,Avg連續通路使用,保費/人,總賠付/總保費,賠付金/人
"('KA', 'CA')",55.9%,49.5,28.8%,7.02,2.23,1.83,16083,59.1%,60670
"('KA', 'KA')",50.6%,49.2,41.3%,10.39,1.49,4.1,8157,61.1%,53146
"('KA', 'JB')",51.1%,47.6,30.7%,9.84,1.95,2.2,9357,59.7%,46167
"('KA', 'BA')",53.8%,47.6,48.6%,9.45,2.02,2.04,10647,54.9%,51287
"('KA', 'KB')",52.7%,48.8,41.5%,10.08,2.13,1.93,11298,72.1%,66099


In [36]:
ib_data.filter(regex=target).T.style.format(precision=2, formatter="{:.1%}").apply(lambda x: row_enhance(x, "('KA', 'JB')"), axis=1)

ft_name,車險(任意),住火,團傷,個傷,旅平
"('KA', 'CA')",100.0%,3.9%,2.7%,4.7%,11.5%
"('KA', 'KA')",100.0%,3.6%,2.8%,4.2%,7.9%
"('KA', 'JB')",100.0%,3.3%,2.9%,2.8%,8.8%
"('KA', 'BA')",100.0%,4.6%,2.9%,5.3%,7.5%
"('KA', 'KB')",100.0%,4.4%,3.4%,5.1%,8.3%


In [37]:
ct_data.filter(regex=target).T.style.format(precision=2, formatter="{:.1%}").apply(lambda x: row_enhance(x, "('KA', 'JB')"), axis=1)

ft_name,小客車,機車,進口車,小貨車,未填,電動自行車,其他,大型重機
"('KA', 'CA')",58.0%,13.6%,20.7%,7.0%,0.1%,0.0%,0.0%,0.6%
"('KA', 'KA')",56.5%,26.3%,11.6%,4.1%,0.1%,0.0%,0.6%,0.9%
"('KA', 'JB')",54.6%,31.0%,10.4%,2.1%,0.1%,nan%,0.3%,1.5%
"('KA', 'BA')",47.6%,36.0%,8.4%,4.4%,0.2%,1.1%,1.0%,1.3%
"('KA', 'KB')",49.4%,31.8%,10.9%,6.2%,0.1%,0.2%,0.5%,0.9%


-人數
-出險人數佔比
-車數PR75