# Library(matplotlib or seaborn) convert to bokeh

##### tmp = visualize() 를 먼저 선언합니다.
##### 이후 tmp.'메소드 명'으로 시각화를 시킵니다.
##### bokeh의 default는 html을 띄워주는 것이나, jupyter notebook 사용자에게 맞게 jupyter 내에서 볼 수 있게 해두었습니다.
##### html로 띄우고 싶은 분들은 __init__ 부분의 output_notebook() 부분을 지우시면 됩니다.
##### 또한, html로 다운을 원하신다면, mothod의 끝에 'output_file("file 명")'을 추가하시면 됩니다.
##### 물론, jupyter 내의 이미지를 다운받는 기능도 있습니다.

In [None]:
class visualize():
    def __init__(self, data, width=600, height=500):            
        self.data = data
        self.width = width
        self.height = height
        self.initialize()
        

    def initialize(self):
        from bokeh.io import output_notebook 
        from bokeh.plotting import figure
        self.tools = "hover,crosshair,pan,wheel_zoom,zoom_in,zoom_out,box_zoom,undo,redo,reset,tap,save,box_select"
        self.p = figure(width=self.width, height=self.height, title = None, tools=self.tools)

        output_notebook()

    def make_color(self, category):
        import random
        COLOR_SCALE = ["#"+''.join([random.choice('0123456789ABCDEF') for j in range(6)]) for i in range(len(category))]
        return COLOR_SCALE


    def Density(self, x, bin=50, line_color='white', fill_color='skyblue'):
        """
        x : x축에 들어갈 변수
        bin : 히스토그램을 몇 개의 막대로 나타낼것인지
        line_color : 히스토그램의 모서리 색상
        fill_color : 히스토그램의 안쪽 색상
        """
        data = self.data

        from bokeh.io import show
        import numpy as np

        x = data[x]
        p = self.p

        bins = np.linspace(min(x),max(x),bin)
        hist, edges = np.histogram(x, density=True, bins=bins)
        p.quad(top=hist,
               bottom=0,
               left=edges[:-1],
               right=edges[1:],
               fill_color=fill_color,
               line_color=line_color,)
        
        d = np.linspace(min(x),max(x),bin*3)
        pdf = np.exp(-0.5*d**2) / np.sqrt(2.0*np.pi)
        p.line(d, 
               pdf, 
               line_width=2, 
               line_color="navy",
               legend_label="Probability Density Function")
        show(p)
        self.initialize()

    def kde2D(self, x, y, N=300):
        
        from bokeh.io import show
        from bokeh.palettes import Blues9
        import numpy as np
        from scipy.stats import gaussian_kde
        
        data = self.data
        x = data[x]
        y = data[y]
        N = N
        p = self.p
        
        xmin, xmax = x.min(), x.max()
        ymin, ymax = y.min(), y.max()

        X, Y = np.mgrid[xmin:xmax:N*1j, ymin:ymax:N*1j]
        positions = np.vstack([X.ravel(), Y.ravel()])
        values = np.vstack([x, y])
        kernel = gaussian_kde(values)
        Z = np.reshape(kernel(positions).T, X.shape)


        p.background_fill_color = "#fafafa"
        p.grid.level = "overlay"
        p.grid.grid_line_color = "black"
        p.grid.grid_line_alpha = 0.05

        palette = Blues9[::-1]
        levels = np.linspace(np.min(Z), np.max(Z), 10)
        p.contour(X, Y, Z, levels[1:], fill_color=palette, line_color=palette)

        show(p)
        self.initialize()


    def bubble(self, x, y, category, size):
        
        from bokeh.io import show
        from bokeh.models import CategoricalColorMapper, ColumnDataSource, Select, CustomJS
        from bokeh.layouts import row
        import random

        data = self.data
        p = self.p
        x = x
        y = y

        factors = data[category].unique()

        COLOR_SCALE = self.make_color(factors)
        source = ColumnDataSource(data)
        color_mapper = CategoricalColorMapper(factors=factors, palette = COLOR_SCALE)
        
        p.circle(x=x, 
                 y=y, 
                 size=size, 
                 color={'field': category, 'transform': color_mapper}, 
                 alpha=0.7, source=source)
        
        options = sorted(data[category].unique())
        options.insert(0, 'All')

        category_select = Select(title='Category:', options=options)

        categories = category
        code = f'''
            var data = source.data;
            var category = cb_obj.value;
            var indices = [];
            for (var i = 0; i < data['{categories}'].length; i++) {{
                if (category == 'All' || data['{categories}'][i] == category) {{
                    indices.push(i);
                }}
            }}
            source.selected.indices = indices;
        '''

        callback = CustomJS(args=dict(source=source), code=code)
        category_select.js_on_change('value', callback)
        layout = row(category_select, p)
        show(layout)
        self.initialize()

    def connected_scatter(self, x, y):
        from bokeh.plotting import curdoc, show
        
        p = self.p
        data = self.data
        x = data[x]
        y = data[y]

        line_color = '#1B7837'

        curdoc().theme = 'caliber'
        p.line(x, y, color=line_color)
        p.circle(x, y, line_color=line_color, fill_color="white")

        show(p)
        self.initialize()


    def pairplot(self, category, target:tuple):

        from bokeh.models import ColumnDataSource, DataRange1d, Plot, LinearAxis, BasicTicker, Grid, Circle, PanTool, WheelZoomTool, ResetTool, LassoSelectTool, HoverTool
        from bokeh.transform import factor_cmap
        from bokeh.plotting import reset_output
        from bokeh.io import output_notebook 
        from bokeh.layouts import gridplot
        from bokeh.io import show
        from itertools import product

        data = self.data
        
        SPECIES = sorted(data[category].unique())
        ATTRS = target
        N = len(ATTRS)

        source = ColumnDataSource(data=data)

        xdrs = [DataRange1d(bounds=None) for _ in range(N)]
        ydrs = [DataRange1d(bounds=None) for _ in range(N)]

        plots = []

        reset_output()
        output_notebook()
        for i, (y, x) in enumerate(product(ATTRS, reversed(ATTRS))):
            p = Plot(x_range=xdrs[i%N], y_range=ydrs[i//N],
                    background_fill_color="#fafafa",
                    border_fill_color="white", width=200, height=200, min_border=5)

            if i % N == 0:  # first column
                p.min_border_left = p.min_border + 4
                p.width += 40
                yaxis = LinearAxis(axis_label=y)
                yaxis.major_label_orientation = "vertical"
                p.add_layout(yaxis, "left")
                yticker = yaxis.ticker
            else:
                yticker = BasicTicker()
            p.add_layout(Grid(dimension=1, ticker=yticker))

            if i >= N*(N-1):  # last row
                p.min_border_bottom = p.min_border + 40
                p.height += 40
                xaxis = LinearAxis(axis_label=x)
                p.add_layout(xaxis, "below")
                xticker = xaxis.ticker
            else:
                xticker = BasicTicker()
            p.add_layout(Grid(dimension=0, ticker=xticker))

            circle = Circle(x=x, y=y, fill_alpha=0.6, size=5, line_color=None,
                            fill_color=factor_cmap('species', 'Category10_3', SPECIES))
            r = p.add_glyph(source, circle)
            p.x_range.renderers.append(r)
            p.y_range.renderers.append(r)

            # suppress the diagonal
            if (i%N) + (i//N) == N-1:
                r.visible = False
                p.grid.grid_line_color = None

            p.add_tools(PanTool(), WheelZoomTool(), ResetTool(), LassoSelectTool(), HoverTool())
            plots.append(p)
        
        show(gridplot(plots, ncols=N))

        self.initialize()

    def heatmap(self, x, y, target):
        from math import pi
        from bokeh.models import BasicTicker, PrintfTickFormatter
        from bokeh.plotting import figure, show
        from bokeh.sampledata.unemployment1948 import data
        from bokeh.transform import linear_cmap


        data = self.data
        x_range = data[x].unique()
        y_range = data[y].unique()

        colors = ["#75968f", "#a5bab7", "#c9d9d3", "#e2e2e2", "#dfccce", "#ddb7b1", "#cc7878", "#933b41", "#550b1d"]

        p = figure(x_range=x_range, y_range=y_range,
                x_axis_location="above", width=self.width, height=self.height,
                tools=self.tools, toolbar_location='below')

        p.grid.grid_line_color = None
        p.axis.axis_line_color = None
        p.axis.major_tick_line_color = None
        p.axis.major_label_text_font_size = "7px"
        p.axis.major_label_standoff = 0
        p.xaxis.major_label_orientation = pi / 3

        r = p.rect(x=x, y=y, width=1, height=1, source=data,
                fill_color=linear_cmap("rate", colors, low=data[target].min(), high=data[target].max()),
                line_color=None)

        p.add_layout(r.construct_color_bar(
            major_label_text_font_size="7px",
            ticker=BasicTicker(desired_num_ticks=len(colors)),
            formatter=PrintfTickFormatter(format="%d%%"),
            label_standoff=6,
            border_line_color=None,
            padding=5,
        ), 'right')

        show(p)
        self.initialize()


    def scatter(self, x, y, radii=None, color=False):
        import numpy as np
        from bokeh.io import show
        data=self.data
        x=data[x]
        y=data[y]
        if color == False:
            colors = 'black'
        else: 
            colors = np.array([(r, g, 150) for r, g in zip(50+2*x, 30+2*y)], dtype="uint8")

        p = self.p

        p.scatter(x, y, radius=radii,
          fill_color=colors, fill_alpha=0.6,
          line_color=None)

        show(p)
        self.initialize()
        

In [None]:
import pandas as pd
data = pd.read_csv('./Customers.csv', index_col='CustomerID')

In [None]:
data.dropna(inplace=True)
data.reset_index(drop=True, inplace=True)

In [None]:
data