In [1]:
import numpy as np
import pandas as pd
import dash
import dash_cytoscape as cyto
from dash.dependencies import Input, Output
import dash_core_components as dcc
import dash_html_components as html
import plotly.graph_objects as go
import networkx as nx
import json
from jupyter_dash import JupyterDash
import csv

In [2]:
from sklearn.preprocessing import MinMaxScaler
scaler = MinMaxScaler()

In [4]:
with open('product_6015_elist.json', 'r') as f:
    data = json.load(f)
eglist = pd.DataFrame(data); eglist.sample(5)

Unnamed: 0,source,target,weight
3688,보습,느낌,0.8423901463254266
1297,끈적거리,샘플,3.4672439980068237
2310,레드,제품,0.5758468821314835
4337,사용하,제품,0.3629248848888117
281,같이,사서,2.203582838902685


In [None]:
edge_to_sample = 2000

In [None]:
eglist = eglist.sample(edge_to_sample).reset_index(drop=True); 
eglist['weight'] = scaler.fit_transform(eglist['weight'].values.reshape(-1, 1))
eglist.info()

In [None]:
eglist.sample(10)

In [28]:
nodes_list= pd.DataFrame(pd.concat([eglist['source'], eglist['target']], ignore_index=True), columns=['nodes'])

In [29]:
print('nodes num: {}'.format(len(np.unique(nodes_list['nodes'].values))))

nodes num: 194


In [30]:
cutoff_div = 1

In [31]:
degree_dict= pd.DataFrame(nodes_list['nodes'].value_counts()).to_dict()
degree_dict = degree_dict['nodes']
degree_dict.update({n: degree_dict[n] for n in degree_dict.keys()})
degree_dict.values()
degree_dict.update({n: degree_dict[n]/cutoff_div for n in degree_dict.keys()})

In [32]:
print('node degree range: {} - {}'.format( min(degree_dict.values()), max(degree_dict.values())))

node degree range: 2.0 - 48.0


In [33]:
nodes = set()
cy_edges, cy_nodes = [], []
edges = eglist.values
colors = ['gray', 'skyblue', 'lightgreen', 'orange', 'pink']
for edge in edges:
    source, target, weights = edge[0], edge[1], edge[2]
    color = colors[len(cy_nodes) % 5]
    
    if (degree_dict[source] > 15) and (degree_dict[target] > 15):

        if source not in nodes:  # Add the source node
            nodes.add(source)
            cy_nodes.append({"data": {"id": source, "degree": degree_dict[source]}, "classes": color})

        if target not in nodes:  # Add the target node
            nodes.add(target)
            cy_nodes.append({"data": {"id": target, "degree": degree_dict[target]}, "classes": color})
    
        cy_edges.append({ 'data': {'source': source, 'target': target, 'weights': float(np.round(weights, 2))}, 'classes': color })
print('The number of nodes to show in graph: {}'.format(len(nodes)))

The number of nodes to show in graph: 131


In [34]:
cy_nodes[:3]

[{'data': {'id': '글로', 'degree': 36.0}, 'classes': 'gray'},
 {'data': {'id': '진짜', 'degree': 47.0}, 'classes': 'gray'},
 {'data': {'id': '보이', 'degree': 33.0}, 'classes': 'lightgreen'}]

In [35]:
cy_edges[:3]

[{'data': {'source': '글로', 'target': '진짜', 'weights': 0.03},
  'classes': 'gray'},
 {'data': {'source': '보이', 'target': '누드', 'weights': 0.09},
  'classes': 'lightgreen'},
 {'data': {'source': '마스크', 'target': '포인트', 'weights': 0.35},
  'classes': 'pink'}]

In [None]:
app = dash.Dash(__name__)
# app = JupyterDash(__name__)

default_stylesheet = [
    {
        "selector": 'node',
        'style': {
            'content': 'data(id)',
            'opacity': 0.8,
            'text-opacity': 1,
            'height': 'data(degree)',
            'width': 'data(degree)',
#             'height': 10,
#             'width': 10,
            'font-size': '5px',
        }
    },
    {
        "selector": 'edge',
        'style': {
            "opacity": 'data(weights)'
        }
    },
    
    *[{
        "selector": '.' + color,
        'style': {'line-color': color, 'background-color': color}
    } for color in colors]
]

app.layout = html.Div([
    html.H2('글로우픽 리뷰 키워드 네트워크 분석', style={'textAlign': 'center'}),
    dcc.Dropdown(
        id='dropdown-update-layout',
        value='random',
        clearable=False,
        options=[
            {'label': name.capitalize(), 'value': name}
            for name in ['random', 'grid', 'circle', 'cose', 'concentric', 'breadthfirst']
        ],
        style= {'width': '30%'}
    ),
    html.H3(id='cytoscape-mouseoverEdgeData-output'),
    cyto.Cytoscape(
        id='cytoscape',
        elements= cy_edges + cy_nodes,
        stylesheet= default_stylesheet,
        layout= {'name': 'concentric'},
        style= {'height': '100vh', 
                'width': '60%'},
    ),
])

@app.callback(Output('cytoscape', 'layout'),
              Input('dropdown-update-layout', 'value'))

def update_layout(layout):
    return {
        'name': layout,
        'animate': True
    }

@app.callback(Output('cytoscape-mouseoverEdgeData-output', 'children'),
              Input('cytoscape', 'mouseoverEdgeData'))
def displayTapEdgeData(data):
    if data:
        return "PMI value(normalized): " + str(data['weights'])

app.run_server(debug=True, use_reloader=False)
# app.run_server(port=8000, mode='inline')

Dash is running on http://127.0.0.1:8050/

 * Serving Flask app "__main__" (lazy loading)
 * Environment: production
[2m   Use a production WSGI server instead.[0m
 * Debug mode: on


#### convert csv file to json file

In [19]:
# def csv_to_json(csvFilePath):
#     jsonArray = []
      
#     #read csv file
#     with open(csvFilePath, encoding='utf-8') as csvf: 
#         #load csv file data using csv library's dictionary reader
#         csvReader = csv.DictReader(csvf) 

#         #convert each csv row into python dict
#         for row in csvReader: 
#             #add this python dict to json array
#             jsonArray.append(row)
        
#     return jsonArray

In [17]:
# json_data = csv_to_json('product_6015_elist.csv')

In [20]:
# with open('product_6015_elist.json', 'w', encoding='utf-8') as jsonf: 
#     jsonString = json.dumps(json_data, indent=4)
#     jsonf.write(jsonString)