## 라이브러리 로드

In [None]:
# 데이터 분석을 위한 pandas 라이브러리를 불러옵니다.
# 데이터 시각화를 위한 seaborn 을 불러옵니다.
import pandas as pd
import seaborn as sns

In [None]:
print(pd.__version__)
print(sns.__version__)

2.1.4
0.13.1


## 데이터셋 불러오기
<img src="https://pandas.pydata.org/docs/_images/02_io_readwrite.svg">

* seaborn 데이터셋 위치 : https://github.com/mwaskom/seaborn-data

In [None]:
# df
df = sns.load_dataset("mpg")
df.shape

(398, 9)

## 데이터셋 일부만 가져오기

In [None]:
# head
df.head(3)

Unnamed: 0,mpg,cylinders,displacement,horsepower,weight,acceleration,model_year,origin,name
0,18.0,8,307.0,130.0,3504,12.0,70,usa,chevrolet chevelle malibu
1,15.0,8,350.0,165.0,3693,11.5,70,usa,buick skylark 320
2,18.0,8,318.0,150.0,3436,11.0,70,usa,plymouth satellite


In [None]:
# tail
df.tail(3)

Unnamed: 0,mpg,cylinders,displacement,horsepower,weight,acceleration,model_year,origin,name
395,32.0,4,135.0,84.0,2295,11.6,82,usa,dodge rampage
396,28.0,4,120.0,79.0,2625,18.6,82,usa,ford ranger
397,31.0,4,119.0,82.0,2720,19.4,82,usa,chevy s-10


In [None]:
# sample
df.sample(3)

Unnamed: 0,mpg,cylinders,displacement,horsepower,weight,acceleration,model_year,origin,name
45,18.0,6,258.0,110.0,2962,13.5,71,usa,amc hornet sportabout (sw)
364,26.6,8,350.0,105.0,3725,19.0,81,usa,oldsmobile cutlass ls
101,23.0,6,198.0,95.0,2904,16.0,73,usa,plymouth duster


## 매번 봐야 하는 기술통계값 코드 한 줄로 보기
### Pandas Profiling
* [pandas-profiling/pandas-profiling: Create HTML profiling reports from pandas DataFrame objects](https://github.com/pandas-profiling/pandas-profiling)

In [None]:
!pip install pandas pandas-profiling

Collecting visions==0.7.4 (from visions[type_image_path]==0.7.4->pandas-profiling)
  Using cached visions-0.7.4-py3-none-any.whl.metadata (5.9 kB)
Using cached visions-0.7.4-py3-none-any.whl (102 kB)
Installing collected packages: visions
  Attempting uninstall: visions
    Found existing installation: visions 0.7.6
    Uninstalling visions-0.7.6:
      Successfully uninstalled visions-0.7.6
[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.
ydata-profiling 4.10.0 requires visions[type_image_path]<0.7.7,>=0.7.5, but you have visions 0.7.4 which is incompatible.[0m[31m
[0mSuccessfully installed visions-0.7.4


In [None]:
!pip uninstall pandas-profiling -y

# ydata-profiling 설치
!pip install -U ydata-profiling

Found existing installation: pandas-profiling 3.2.0
Uninstalling pandas-profiling-3.2.0:
  Successfully uninstalled pandas-profiling-3.2.0
Collecting visions<0.7.7,>=0.7.5 (from visions[type_image_path]<0.7.7,>=0.7.5->ydata-profiling)
  Using cached visions-0.7.6-py3-none-any.whl.metadata (11 kB)
Using cached visions-0.7.6-py3-none-any.whl (104 kB)
Installing collected packages: visions
  Attempting uninstall: visions
    Found existing installation: visions 0.7.4
    Uninstalling visions-0.7.4:
      Successfully uninstalled visions-0.7.4
Successfully installed visions-0.7.6


In [None]:
from ydata_profiling import ProfileReport
profile = ProfileReport(df, title="Pandas Profiling Report")

In [None]:
#Colab에선 Profile 보기가 불편함

In [None]:
# 주피터 노트북이 있는 위치에 html파일이 생성됩니다.
profile.to_file("pandas_profile_report.html")



Summarize dataset:   0%|          | 0/5 [00:00<?, ?it/s]

(using `df.profile_report(correlations={"auto": {"calculate": False}})`
If this is problematic for your use case, please report this as an issue:
https://github.com/ydataai/ydata-profiling/issues
(include the error message: 'Function <code object pandas_auto_compute at 0x79f184669790, file "/usr/local/lib/python3.10/dist-packages/ydata_profiling/model/pandas/correlations_pandas.py", line 167>')


Generate report structure:   0%|          | 0/1 [00:00<?, ?it/s]

Render HTML:   0%|          | 0/1 [00:00<?, ?it/s]

Export report to file:   0%|          | 0/1 [00:00<?, ?it/s]

### sweetviz
* [sweetviz · PyPI](https://pypi.org/project/sweetviz/)
<img src="https://warehouse-camo.ingress.cmh1.psfhosted.org/210c4eb7317c8b0d2e7d0784b1a9697c096050ef/687474703a2f2f636f6f6c74696d696e672e636f6d2f53562f4c61796f75742d416e696d332e676966">

In [None]:
# 아래 명령어로 설치해 주셔야지 사용할 수 있습니다.
!pip install sweetviz

Collecting sweetviz
  Downloading sweetviz-2.3.1-py3-none-any.whl.metadata (24 kB)
Downloading sweetviz-2.3.1-py3-none-any.whl (15.1 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m15.1/15.1 MB[0m [31m83.4 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: sweetviz
Successfully installed sweetviz-2.3.1


In [None]:
import sweetviz as sv

my_report = sv.analyze(df)
# 타겟변수 없이 그릴 수도 있고 타겟변수를 지정할 수도 있습니다.
# 타겟변수는 범주형이 아닌 수치, bool 값만 가능합니다.
# 데이터에 따라 수치형으로 되어있지만 동작하지 않을 수도 있습니다.
# my_report = sv.analyze(df, target_feat ='mpg')
my_report.show_html()

                                             |          | [  0%]   00:00 -> (? left)

Report SWEETVIZ_REPORT.html was generated! NOTEBOOK/COLAB USERS: the web browser MAY not pop up, regardless, the report IS saved in your notebook/colab files.
