- https://plotly.com/python/dot-plots/

In [1]:
import json
import pandas as pd
import numpy as np
import dash
import dash_core_components as dcc
import dash_html_components as html
import dash_bootstrap_components as dbc
import plotly.express as px
import plotly.graph_objects as go
from jupyter_dash import JupyterDash
import warnings; warnings.filterwarnings('ignore')

#### Data import

In [2]:
with open('lipgloss_category.json', 'rb') as f:
    data = json.load(f)

In [3]:
data[0].keys()

dict_keys(['product_id', 'brand_id', 'brand_name', 'category_name', 'product_name', 'product_description', 'volume', 'product_price', 'product_rating_avg', 'review_count', 'created_at', 'modified_at', 'is_stable', 'color_type', 'tags', 'subcategory_name', 'seller'])

In [13]:
data_df = pd.DataFrame(data); data_df.iloc[:5]

Unnamed: 0,product_id,brand_id,brand_name,category_name,product_name,product_description,volume,product_price,product_rating_avg,review_count,created_at,modified_at,is_stable,color_type,tags,subcategory_name,seller
0,130981,6019,롬앤,립글로스,글래스팅 워터 글로스,,4.5g,13000,4.36,446,2021-07-14T02:48:05.000Z,2021-07-14T02:48:05.000Z,0,,,,
1,56725,8,RMK,립글로스,립 젤리 글로스,,5.5g,30000,4.32,154,2021-07-14T02:48:05.000Z,2021-07-14T02:48:05.000Z,0,,,,
2,91694,6019,롬앤,립글로스,립매터,,8g,11000,4.14,262,2021-07-14T02:48:05.000Z,2021-07-14T02:48:05.000Z,0,,,,
3,6015,20,나스,립글로스,립 글로스,,8g,36000,4.13,70,2021-07-14T02:48:05.000Z,2021-07-14T02:48:05.000Z,0,,,,
4,60784,33,더샘,립글로스,샘물 세럼 립글로스,,4.5g,5500,4.02,85,2021-07-14T02:48:05.000Z,2021-07-14T02:48:05.000Z,0,,,,


#### 제품별 리뷰 수 분포
- 10개 이상의 리뷰를 가진 제품만 표시

In [17]:
data_isreview = data_df[data_df['review_count'] > 10]
data_isreview = data_isreview.sort_values(by=['review_count'], ascending=False)
data_isreview = data_isreview.reset_index(drop=True)

In [22]:
data_isreview[['product_name', 'review_count']]

Unnamed: 0,product_name,review_count
0,페이스잇 레슨 아티스트 핑거 글로스,2985
1,글래스팅 워터 글로스,446
2,어딕트 립 맥시마이저,443
3,립매터,262
4,울트라 플러쉬 립 글로스(젤리 글로스),260
...,...,...
116,더 스타일 글램 펄 글로스,11
117,센슈얼 글로스,11
118,페이스잇리얼다이아몬드립글로스,11
119,퓨어 칼라 하이 글로스,11


#### 제품별 평균 평점 top/bottom 20
- 10개 이상의 리뷰를 가진 제품만 표시

In [6]:
data_rating = data_isreview.sample(frac=1)
data_rating = data_rating.sort_values(by=['product_rating_avg'], ascending=False)
data_rating = data_rating.reset_index(drop=True)

In [7]:
data_rating = pd.concat([data_rating.head(20), data_rating.tail(20)])
data_rating = data_rating.reset_index(drop=True)

#### 브랜드 분포(top 20)

In [25]:
brand_freq = pd.DataFrame(data_df['brand_name'].value_counts())
brand_freq['brand'] = brand_freq.index
brand_freq = brand_freq.rename(columns={'brand_name': 'freq'}); 
brand_freq = brand_freq.reset_index(drop=True)
brand_freq = brand_freq.sort_values(by=['freq'], ascending=False)
brand_freq = brand_freq[:20]

#### Top 20 브랜드 중 브랜드별 제품 평균 가격

In [29]:
data_brand = data_df[data_df['brand_name'].isin(brand_freq['brand'].values)]

In [30]:
brand_price = data_brand.groupby('brand_name')['product_price'].mean().reset_index()
brand_price = brand_price.sort_values(by=['product_price']); brand_price.iloc[:10]

Unnamed: 0,brand_name,product_price
0,NYX,2733.333333
14,어퓨,5816.666667
15,에뛰드,7450.0
11,스킨푸드,7800.0
3,더샘,7940.0
19,홀리카홀리카,8150.0
17,토니모리,8357.142857
4,더페이스샵,8590.0
8,미샤,10288.888889
9,바닐라코,11400.0


#### 시각화 실행

In [31]:
# app = JupyterDash(external_stylesheets=[dbc.themes.SUPERHERO]) # 주피터 상에서 미리보기 할 시
app = dash.Dash(external_stylesheets=[dbc.themes.SUPERHERO])

fig = px.scatter(data_rating, x='product_name', y='product_rating_avg',       
             color='product_rating_avg', labels={'product_rating_avg':'평점'}) # 평점 top/bottm 20 scatter 그래프
fig2 = px.bar(data_isreview.iloc[:30], x='product_name', y='review_count', 
             barmode='group', color=np.log(data_isreview.iloc[:30]['review_count']), labels={'color':'리뷰 수(log)'}) # 리뷰 수(top 2-30) 막대 그래프
fig3 = px.pie(brand_freq, values='freq', names='brand', hover_data = ['brand']) # 브랜드 분포 pie 그래프
fig4 = go.Figure()
fig4 = fig4.add_trace(go.Scatter(x=brand_price['product_price'], y=brand_price['brand_name'], 
                                 marker=dict(color='rgba(157, 137, 236, 1)', line_color='rgba(156, 165, 196, 1.0)') # 브랜드별 가격 분포 scatter 그래프
))

fig.update_layout(title={'text':'제품별 평균 평점 수 Top / Bottom 20', 'x':0.5},
                  title_font_color="white",
                  title_font_size=22, # 제목 및 제목 위치, 폰트 옵션
                  xaxis_title = "제품명",
                  yaxis_title = "평균 평점", # x축, y축 제목
                  font = dict(family="Malgun Gothic", size=10, color="White"),
                  width=1370, height=600, # 그래프 크기
                  plot_bgcolor='rgba(219, 242, 239, 0.8)', # 그래프 내부 배경
                  paper_bgcolor= 'rgba(7, 45, 77, 1)', # 그래프 전체 배경
)

fig.update_xaxes(tickangle=45, color='white') # x축인 제품명 길이가 길어 잘 표기가 안되면 45도 회전해 표시한다.
fig.update_yaxes(color='white')
fig.update_traces(marker=dict(size=12))

fig2.update_layout(title={'text':'제품별 리뷰 수 Top 30', 'x':0.5},
                   title_font_color="white",
                   title_font_size=22,
                   xaxis_title = "제품명",
                   yaxis_title = "리뷰 수(log)",
                   font = dict(family="Malgun Gothic", size=10, color="White"),
                   width=1370, height=600,
                   plot_bgcolor='rgba(219, 242, 239, 0.8)',
                   paper_bgcolor='rgba(7, 45, 77, 1)',
)
fig2.update_yaxes(type="log", color="white")
fig2.update_xaxes(tickangle=45, color="white")

fig3.update_traces(textposition='inside', textinfo='percent+label')
fig3.update_layout(title={'text':'브랜드 분포', 'x':0.5},
                   title_font_color="white",
                   title_font_size=22,)
fig3.update_layout(margin=dict(t=70, b=50, l=5, r=5), showlegend=False) # 상하좌우 여백
fig3.update_layout(legend=dict(yanchor="top", y=0.98, xanchor="left", x=0), # 범례 위치 조절
                   paper_bgcolor='rgba(7, 45, 77, 1)') 

fig4.update_traces(mode='markers', marker=dict(line_width=1, symbol='circle', size=16))
fig4.update_layout(title={'text':'브랜드별 제품 평균 가격', 'x':0.5},
                   title_font_color="white",
                   title_font_size=22,)
fig4.update_layout(xaxis=dict(showgrid=True, showline=True, linecolor='rgb(102, 102, 102)', tickfont_color='rgb(255,255,255)',
                   showticklabels=True, dtick=5000, ticks='outside', tickcolor='rgb(102, 102, 102)', title='평균 가격(천원)'),
                   margin=dict(l=10, r=30, b=50, t=80),
                   legend=dict(font_size=10, yanchor='middle', xanchor='right'),
                   width=800, height=600,
                   paper_bgcolor='rgba(7, 45, 77, 1)', plot_bgcolor='rgba(180, 228, 250, 0.35)', hovermode='closest'
)
fig4.update_yaxes(color="white")
fig4.update_xaxes(color="white")

app.layout = \
dbc.Container([  html.Br(), html.H3(children="글로우픽 '립글로스' 제품군 시각화 분석", style={'textAlign': 'center'}), html.Br(), html.Br(),\
    dbc.Row([
    dbc.Col([dcc.Graph(id='rating-dot-chart',figure=fig, style={'width': '100%'})], ), 
    ]),html.Br(),
  
    dbc.Row([
    dbc.Col([dcc.Graph(id='brand-pie-chart',figure=fig3)], width=6),
    dbc.Col([dcc.Graph(id='brand-price-chart',figure=fig4)], width=1),
    ]), html.Br(),  

    dbc.Row([
    dbc.Col([dcc.Graph(id='reivewnum-bar-chart',figure=fig2, style={"margin-left": "auto", 'width': '100%'})], ),
    ]), html.Br(),
])

app.run_server(port=8200, mode='inline')
app.run_server(debug=True, port=8500, use_reloader=False)
if __name__ == '__main__':
    app.run_server(debug=True)

Dash is running on http://127.0.0.1:8500/

Dash is running on http://127.0.0.1:8500/

Dash is running on http://127.0.0.1:8500/

Dash is running on http://127.0.0.1:8500/

 * Serving Flask app "__main__" (lazy loading)
 * Environment: production
[2m   Use a production WSGI server instead.[0m
 * Debug mode: on
