In [2]:
import plotly
import cufflinks as cf
import pandas as pd
import numpy as np

In [3]:
print(plotly.__version__)
print(cf.__version__)
print(pd.__version__)
print(np.__version__)

4.2.1
0.17.3
1.0.3
1.18.1


In [4]:
#Enabling the offline mode for interactive plotting locally
from plotly.offline import download_plotlyjs,init_notebook_mode,plot,iplot
init_notebook_mode(connected=True)
cf.go_offline()

### 데이터 생성 및 plot

In [7]:
np.random.randn(10,4)

array([[-0.35249903, -1.07759201,  0.26263484,  0.2895989 ],
       [ 0.86324092,  0.77061561, -1.69657985,  1.18028326],
       [-0.4753461 ,  1.76619765, -0.68311338, -0.6547885 ],
       [ 0.89364616, -0.00959494, -0.75803566,  0.34940352],
       [ 0.54138098, -1.04479811,  1.40144903,  0.56115544],
       [-0.52100082, -1.56904141, -0.67535622, -0.13744128],
       [ 0.28774785,  0.97404077, -0.15263539,  0.77172215],
       [-0.71986984,  0.92532403, -1.10161918,  0.31785359],
       [ 0.1386126 , -1.19621091, -0.19300219, -0.38810572],
       [ 0.8236792 ,  0.13508551,  0.34315057,  2.81063427]])

In [5]:
# create Data
df = pd.DataFrame(np.random.randn(100,4),   # 100개의 행, 4개의 열로 난수발생
                 columns='A B C D'.split())

print(df.shape)
df.head()

(100, 4)


Unnamed: 0,A,B,C,D
0,-0.49776,-0.812894,0.319636,0.809129
1,0.117874,-0.356806,0.415075,-1.511179
2,-2.941449,1.275813,-0.398161,0.889545
3,1.502261,0.188442,-0.445753,-0.319527
4,1.299319,-0.505759,-1.413864,-1.180813


In [8]:
df2 = pd.DataFrame({'items':['bag','apple','cap'],'Values':[32,43,50]})
df2

Unnamed: 0,items,Values
0,bag,32
1,apple,43
2,cap,50


### Line Plot

In [9]:
df.iplot()

In [19]:
df.A.iplot()

In [20]:
df.loc[:,'A':'B'].iplot()

In [21]:
df.iloc[:, 0:3].iplot()

### Scatter Plot

In [11]:
df.iplot(kind='scatter', 
         x='A', y='B', 
         mode='markers', size=20)   # mode설정 안하면 선으로 표현

### Bar Plot

In [12]:
df2.iplot(kind='bar', x='items', y='Values')

In [18]:
df3 = pd.DataFrame({'hobby':['movie','tv','music','sleep'],
                   'score':[80,70,88,97]})
df3.iplot(kind='bar', x='hobby', y='score')

In [22]:
df['A'].iplot(kind='bar')

### stack plot

In [23]:
df.iplot(kind='bar', barmode='stack')

In [24]:
df[['A','B']].iplot(kind='bar', barmode='group')

In [25]:
df.iplot(kind='barh', barmode='stack')

### Box Plot

In [26]:
df.iplot(kind='box')

In [27]:
df[['A','C']].iplot(kind='box')

### 3D Surface Plot

In [28]:
df3 = pd.DataFrame({'x':[1,2,3,4,5],
                    'y':[10,20,30,40,60],
                    'z':[5,4,3,2,1]})
df3

Unnamed: 0,x,y,z
0,1,10,5
1,2,20,4
2,3,30,3
3,4,40,2
4,5,60,1


In [29]:
df3.iplot(kind='surface',colorscale='rdylbu')

### Line Charts

In [33]:
print(cf.datagen.lines.__doc__)


	Returns a DataFrame with the required format for 
	a scatter (lines) plot

	Parameters:
	-----------
		n_traces : int
			Number of traces 
		n : int
			Number of points for each trace
		columns : [str]
			List of column names
		dateIndex : bool
			If True it will return a datetime index
			if False it will return a enumerated index
		mode : string
			Format for each item
				'abc' for alphabet columns
				'stocks' for random stock names
	


In [30]:
df = cf.datagen.lines()
df.head()

Unnamed: 0,YQY.UT,HRU.AU,KQC.RM,LGQ.QW,AGS.PJ
2015-01-01,0.073972,-0.175159,-0.812826,0.353491,0.012374
2015-01-02,2.501159,-0.349398,-0.453956,-0.255014,0.507322
2015-01-03,1.978699,0.409341,0.428836,-1.054072,-0.667538
2015-01-04,2.032286,1.51817,-0.511259,-2.155382,-1.729354
2015-01-05,0.685088,2.724312,-0.553106,-1.978452,-2.817019


In [31]:
df.iplot(kind='line')

### Plot Styling

#### 테마(Theme) 설정

In [34]:
themes = cf.getThemes()
themes

['ggplot', 'pearl', 'solar', 'space', 'white', 'polar', 'henanigans']

In [35]:
data = pd.Series(range(10))   # 0~9
for theme in themes:
    data.iplot(kind='bar', theme=theme, title=theme)

In [36]:
cf.set_config_file(theme='pearl')

### Plotly express 사용한 시각화

* cufflinks보다 좀 더 다양하며, 사용방법은 seaborn과 비슷함.
* plotly_express 이용. plotly 4.1 부터는 별도 설치 없어도 됨. 3.8.1의 경우 설치 필요

In [37]:
import plotly.express as px

In [41]:
# iris 데이터 불러오기
iris = px.data.iris()
iris

Unnamed: 0,sepal_length,sepal_width,petal_length,petal_width,species,species_id
0,5.1,3.5,1.4,0.2,setosa,1
1,4.9,3.0,1.4,0.2,setosa,1
2,4.7,3.2,1.3,0.2,setosa,1
3,4.6,3.1,1.5,0.2,setosa,1
4,5.0,3.6,1.4,0.2,setosa,1
...,...,...,...,...,...,...
145,6.7,3.0,5.2,2.3,virginica,3
146,6.3,2.5,5.0,1.9,virginica,3
147,6.5,3.0,5.2,2.0,virginica,3
148,6.2,3.4,5.4,2.3,virginica,3


In [50]:
# 산점도
fig = px.scatter(iris, x='sepal_width', y='sepal_length')
fig.show()

In [51]:
# 추가적인 정보(species) - 어떤 종인지 표시하기
fig = px.scatter(iris, x='sepal_width', y='sepal_length', color='species')
fig.show()

In [None]:
# 버전에 따라 에러 뜰 수 있음

import plotly.express as px
df = px.data.iris()
fig = px.scatter(df, 
           x="sepal_width", y="sepal_length", 
           color="species", marginal_y="violin",
           marginal_x="box", trendline="ols")
fig.show()

### Pair Plot

In [53]:
import plotly.express as px
df = px.data.iris()
fig = px.scatter_matrix(df, dimensions=["sepal_width", 
                                        "sepal_length", 
                                        "petal_width", 
                                        "petal_length"], 
                        color="species")
fig.show()

In [54]:
import plotly.express as px
df = px.data.tips()
fig = px.parallel_categories(df, color="size", 
                             color_continuous_scale=px.colors.sequential.Inferno)
fig.show()

In [55]:
df = px.data.gapminder()
print(df.shape)
print(df.columns)
print(px.data.gapminder.__doc__)

(1704, 8)
Index(['country', 'continent', 'year', 'lifeExp', 'pop', 'gdpPercap',
       'iso_alpha', 'iso_num'],
      dtype='object')

    Each row represents a country on a given year.

    https://www.gapminder.org/data/

    Returns:
        A `pandas.DataFrame` with 1704 rows and the following columns: `['country', 'continent', 'year', 'lifeExp', 'pop', 'gdpPercap',
       'iso_alpha', 'iso_num']`.
    
