<a href="https://colab.research.google.com/github/zzhining/data_visualization/blob/main/tips.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# 환경설정
- [seaborn](https://seaborn.pydata.org/api.html)
- [ydata-profiling](https://github.com/ydataai/ydata-profiling)



In [None]:
pip install ydata-profiling

In [None]:
import seaborn as sns
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np

tips = sns.load_dataset("tips")
tips.head()

# 기술 통계

## 수치형 데이터의 기술 통계

In [None]:
# 수치형 데이터의 기술 통계


## 범주형 데이터의 기술 통계
`tips.describe(include = 'all')`

In [None]:
# 범주형 데이터의 기술 통계


# 데이터 시각화

In [None]:
from ydata_profiling import ProfileReport
profile = ProfileReport(tips, title="Profiling Report")
profile

## 1. 변수간 상관관계
---



### 1) Relational  plots

    relplot
    scatterplot
    lineplot


In [None]:
sns.relplot(x="total_bill", y="tip", data=tips)

In [None]:
sns.scatterplot(x="total_bill", y="tip", data=tips)

In [None]:
ax = sns.scatterplot(x="total_bill", y="tip", hue="time", data=tips)

In [None]:
ax = sns.scatterplot(x="total_bill", y="tip", hue="day", style="time", data=tips)

In [None]:
ax = sns.scatterplot(x="total_bill", y="tip", size="size", data=tips)

In [None]:
cmap = sns.cubehelix_palette(dark=.3, light=.8, as_cmap=True)
ax = sns.scatterplot(x="total_bill", y="tip",
                     hue="size", size="size",
                     sizes=(0, 200), palette=cmap,
                     legend="full", data=tips)

In [None]:
sns.lineplot(x="total_bill", y="tip", data=tips)

In [None]:
fig, ax = plt.subplots(ncols=2)
sns.scatterplot(x="total_bill", y="tip", data=tips, ax=ax[0])
sns.lineplot(x="total_bill", y="tip", data=tips, ax=ax[1])




### 2) Regression  plots

    lmplot
    regplot
    residplot



In [None]:
sns.regplot(x="total_bill", y="tip", data=tips)

In [None]:
sns.regplot(x="total_bill", y="tip", data=tips, line_kws={'color': 'red'})

In [None]:
sns.residplot(x="total_bill", y="tip", data=tips)

In [None]:
_,var2 = plt.subplots(3,1,figsize=(20,12))
sns.scatterplot(x="total_bill", y="tip", data=tips, ax=var2[0])
sns.regplot(x="total_bill", y="tip", data=tips, ax=var2[1])
sns.residplot(x="total_bill", y="tip", data=tips, ax=var2[2])



## 2. 데이터 분포(Distribution plots)

    hisplot
    kdeplot
    rugplot



In [None]:
sns.histplot(tips.total_bill)

In [None]:
sns.kdeplot(tips.total_bill)

In [None]:
sns.rugplot(tips.total_bill)



## 3. 범주형 데이터(Categorical plots)

    catplot
    stripplot
    swarmplot
    boxplot
    violinplot
    boxenplot
    pointplot
    barplot
    countplot



In [None]:
sns.swarmplot(x="day", y="total_bill", data=tips)

In [None]:
sns.violinplot(x="day", y="total_bill", data=tips)

In [None]:
sns.boxenplot(x="day", y="total_bill", data=tips)

In [None]:
sns.pointplot(x="day", y="total_bill", data=tips)

In [None]:
sns.barplot(x="day", y="total_bill", data=tips)

In [None]:
sns.countplot(x="day", data=tips)



## 4. 매트릭스 형태(Matrix plots)

    heatmap
    clustermap



In [None]:
tips.corr(numeric_only=True)

In [None]:
sns.heatmap(tips.corr(numeric_only=True),annot=True)

In [None]:
sns.clustermap(tips.corr(numeric_only=True),annot=True)



## 5. 여러 개의 그래프를 한번에 그리기(Multi-plot)

    FacetGird
    pairplot
    jointplot

In [None]:
sns.pairplot(tips)

In [None]:
# "time"을 facet 처리
# row, col : names of variables in ``data``, optional
#     Categorical variables that will determine the faceting of the grid.
sns.relplot(x="total_bill", y="tip", hue="smoker", col="time",data=tips);