# Library(matplotlib or seaborn) convert to bokeh

##### tmp = visualize() 를 먼저 선언합니다.
##### 이후 tmp.'메소드 명'으로 시각화를 시킵니다.
##### bokeh의 default는 html을 띄워주는 것이나, jupyter notebook 사용자에게 맞게 jupyter 내에서 볼 수 있게 해두었습니다.
##### html로 띄우고 싶은 분들은 __init__ 부분의 output_notebook() 부분을 지우시면 됩니다.
##### 또한, html로 다운을 원하신다면, mothod의 끝에 'output_file("file 명")'을 추가하시면 됩니다.
##### 물론, jupyter 내의 이미지를 다운받는 기능도 있다.

In [37]:
class visualize():
    def __init__(self, data, width, height):
        from bokeh.io import output_notebook 
        from bokeh.plotting import figure
        self.data = data
        self.width = width
        self.height = height
        self.tools = "hover,crosshair,pan,wheel_zoom,zoom_in,zoom_out,box_zoom,undo,redo,reset,tap,save,box_select"
        self.p = figure(width=self.width, height=self.height, title = None, tools=self.tools)
        
        output_notebook()

    def Density(self, x, bin=50, line_color='white', fill_color='skyblue'):
        """
        x : x축에 들어갈 변수
        bin : 히스토그램을 몇 개의 막대로 나타낼것인지
        line_color : 히스토그램의 모서리 색상
        fill_color : 히스토그램의 안쪽 색상
        """
        data = self.data

        from bokeh.io import show
        import numpy as np

        x = data[x]
        p = self.p

        bins = np.linspace(min(x),max(x),bin)
        hist, edges = np.histogram(x, density=True, bins=bins)
        p.quad(top=hist,
               bottom=0,
               left=edges[:-1],
               right=edges[1:],
               fill_color=fill_color,
               line_color=line_color,)
        
        d = np.linspace(min(x),max(x),bin*3)
        pdf = np.exp(-0.5*d**2) / np.sqrt(2.0*np.pi)
        p.line(d, 
               pdf, 
               line_width=2, 
               line_color="navy",
               legend_label="Probability Density Function")
        show(p)

    def kde2D(self, x, y, N=300):
        
        from bokeh.io import show
        from bokeh.palettes import Blues9
        import numpy as np
        from scipy.stats import gaussian_kde
        
        data = self.data
        x = data[x]
        y = data[y]
        N = N
        p = self.p
        
        xmin, xmax = x.min(), x.max()
        ymin, ymax = y.min(), y.max()

        X, Y = np.mgrid[xmin:xmax:N*1j, ymin:ymax:N*1j]
        positions = np.vstack([X.ravel(), Y.ravel()])
        values = np.vstack([x, y])
        kernel = gaussian_kde(values)
        Z = np.reshape(kernel(positions).T, X.shape)


        p.background_fill_color = "#fafafa"
        p.grid.level = "overlay"
        p.grid.grid_line_color = "black"
        p.grid.grid_line_alpha = 0.05

        palette = Blues9[::-1]
        levels = np.linspace(np.min(Z), np.max(Z), 10)
        p.contour(X, Y, Z, levels[1:], fill_color=palette, line_color=palette)

        show(p)


    def bubble(self, x, y, category, size):
        
        from bokeh.io import show
        from bokeh.models import CategoricalColorMapper
        import random

        data = self.data
        x = data[x]
        y = data[y]
        p = self.p

        factors = data[category].unique()

        COLOR_SCALE = ["#"+''.join([random.choice('0123456789ABCDEF') for j in range(6)]) for i in range(len(factors))]

        color_mapper = CategoricalColorMapper(factors=factors, palette = COLOR_SCALE)
        
        p.circle(x=x, 
                 y=y, 
                 size=size, 
                 color={'field': category, 'transform': color_mapper}, 
                 alpha=0.7, source=data)
        
        show(p)


In [29]:
import pandas as pd
data = pd.read_csv('./Customers.csv')

In [36]:
import random
["#"+''.join([random.choice('0123456789ABCDEF') for j in range(6)]) for i in range(len(data['Profession'].unique()))]

['#08DF06',
 '#F8A796',
 '#4A1AFB',
 '#E14000',
 '#FA9D90',
 '#9E29DC',
 '#E74988',
 '#6CA972',
 '#67C5E9',
 '#37FFBA']

In [38]:
a = visualize(data, 760,400)

In [39]:
a.bubble('Annual Income ($)', 'Spending Score (1-100)', 'Profession', 'Work Experience')

ValueError: failed to validate CategoricalColorMapper(id='p1404', ...).factors: expected an element of either Seq(String), Seq(Tuple(String, String)) or Seq(Tuple(String, String, String)), got array(['Healthcare', 'Engineer', 'Lawyer', 'Entertainment', 'Artist',
       'Executive', 'Doctor', 'Homemaker', 'Marketing', nan], dtype=object)

In [19]:
a = visualize(data, 760, 400)

In [20]:
a.Density('x')

In [80]:
class visualize():
    def __init__(self):
        from bokeh.io import output_notebook, show
        from bokeh.plotting import figure
        
        self.size = (760,400)
        output_notebook()

    def Density(dataset, x, bin=50, line_color='white', fill_color='skyblue', size=size):
        """
        dataset : pd.DataFrame
        x : x축에 들어갈 변수
        bin : 히스토그램을 몇 개의 막대로 나타낼것인지
        line_color : 히스토그램의 모서리 색상
        fill_color : 히스토그램의 안쪽 색상
        size : 그래프의 사이즈
        """
        
        import numpy as np
        width = size[0]
        height = size[1]
        x = dataset[x]
        p = figure(width=width, 
                   height=height,
                   title = None)
        bins = np.linspace(min(x),max(x),bin)
        hist, edges = np.histogram(x, density=True, bins=bins)
        p.quad(top=hist,
               bottom=0,
               left=edges[:-1],
               right=edges[1:],
               fill_color=fill_color,
               line_color=line_color,)
        
        d = np.linspace(min(x),max(x),bin*3)
        pdf = np.exp(-0.5*d**2) / np.sqrt(2.0*np.pi)
        p.line(d, 
               pdf, 
               line_width=2, 
               line_color="navy",
               legend_label="Probability Density Function")
        show(p)



NameError: name 'size' is not defined

In [5]:
import pandas as pd
import numpy as np

rng = np.random.default_rng()
x = rng.normal(loc=0, scale=1, size=1000)
y = rng.normal(loc=0, scale=3, size=1000)
z = rng.normal(loc=0, scale=2, size=1000)
data = pd.DataFrame(data=[x,y,z],
                    index=['x','y','z']).T


In [6]:
visualize.Density(data, 'z')

AttributeError: 'DataFrame' object has no attribute 'data'

In [None]:
from bokeh.layouts import gridplot
from bokeh.plotting import figure
from bokeh.io import show, output_notebook

In [None]:
import numpy as np

rng = np.random.default_rng()
x = rng.normal(loc=0, scale=1, size=1000)

p = figure(width=670, height=400, toolbar_location=None,
           title="Normal (Gaussian) Distribution")

# Histogram
bins = np.linspace(-3, 3, 40)
# bins = np.linspace(-50,50,300)
hist, edges = np.histogram(x, density=True, bins=bins)
p.quad(top=hist, bottom=0, left=edges[:-1], right=edges[1:],
         fill_color="skyblue", line_color="white",
         legend_label="1000 random samples")

# Probability density function
x = np.linspace(-3.0, 3.0, 100)
pdf = np.exp(-0.5*x**2) / np.sqrt(2.0*np.pi)
p.line(x, pdf, line_width=2, line_color="navy",
       legend_label="Probability Density Function")

p.y_range.start = 0
p.xaxis.axis_label = "x"
p.yaxis.axis_label = "PDF(x)"

show(p)