# Chart Recommender
This file provides an example of running the chart recommender, including the single chart recommender and the multiple-charts recommender.     
The input is a data table in Pandas.DataFrame.  

### Single-Chart Recommender
The output is the recommended charts, described by the column selection and chart type.     

### MV Recommender
The output is the recommended MVs, described as a list of charts.

Notes of limitations:
- A chart can encode a max number of 4 data columns.
- A MV can have a max number of 12 charts.
- The predicted chart type is limited to ('area', 'bar', 'scatter', 'line', 'pie')

In [1]:
import pandas as pd
import json
import numpy as np
import itertools
import sys
import re
import altair as alt

import torch
from torch.nn.utils.rnn import pack_padded_sequence, pad_packed_sequence
from torch.autograd import Variable
import torch.nn.functional as nnf

from model.encodingModel import ChartTypeNN, ChartTypeLSTM, ScoreNetLSTM
# from utils.helper import softmax, get_data_feature_by_column, get_embed_feature_by_column, get_all_charts_scores, charts_to_features
from utils.ChartRecommender import ChartRecommender
from utils.VegaLiteRender import VegaLiteRender

In [2]:
%load_ext autoreload
%autoreload 2

## Load pretrained word-embedding model

In [3]:
word_embedding_model_path = 'utils/en-50d-200000words.vec'

word_embedding_dict = {}
with open(word_embedding_model_path) as file_in:
    lines = []
    for idx, line in enumerate(file_in):
        if idx == 0: ## line 0 is invalid
            continue 
        word, *features = line.split()
        word_embedding_dict[word] = np.array(features)

## Load trained single-chart assessment model and chart type prediction model

In [4]:
gpu = torch.device('cuda:0')

column_score_model = ScoreNetLSTM(input_size=96, seq_length = 4, batch_size=2, pack = True).to(gpu)
column_score_model.load_state_dict(torch.load('trainedModel/singleChartModel.pt', map_location=gpu))
column_score_model.eval()

chart_type_model = ChartTypeLSTM(input_size = 96, hidden_size = 400, seq_length = 4, num_class = 9, bidirectional = True).to(gpu)
chart_type_model.load_state_dict(torch.load('trainedModel/chartType.pt', map_location=gpu))
chart_type_model.eval()

ChartTypeLSTM(
  (lstm): LSTM(96, 400, batch_first=True, bidirectional=True)
  (dense): Sequential(
    (linear0): Linear(in_features=3200, out_features=2000, bias=True)
    (rulu0): LeakyReLU(negative_slope=0.01)
    (dropout0): Dropout(p=0.4, inplace=False)
    (linear1): Linear(in_features=2000, out_features=1600, bias=True)
    (rulu1): LeakyReLU(negative_slope=0.01)
    (dropout1): Dropout(p=0.4, inplace=False)
    (linear2): Linear(in_features=1600, out_features=1200, bias=True)
    (rulu2): LeakyReLU(negative_slope=0.01)
    (dropout2): Dropout(p=0.4, inplace=False)
    (linear4): Linear(in_features=1200, out_features=9, bias=True)
    (output): ReLU()
  )
)

## Data loader and pre-processing

In [5]:
df = pd.read_csv('csv/penguins.csv')
chartRecommender = ChartRecommender(df, 
                                    word_embedding_dict, column_score_model, chart_type_model)

In [6]:
## the dataset
chartRecommender.df.head()

Unnamed: 0,Species,Island,Beak Length (mm),Beak Depth (mm),Flipper Length (mm),Body Mass (g),Sex
0,Adelie,Torgersen,39.1,18.7,181.0,3750.0,MALE
1,Adelie,Torgersen,39.5,17.4,186.0,3800.0,FEMALE
2,Adelie,Torgersen,40.3,18.0,195.0,3250.0,FEMALE
3,Adelie,Torgersen,,,,,
4,Adelie,Torgersen,36.7,19.3,193.0,3450.0,FEMALE


In [7]:
## the fields/columns of the dataset
chartRecommender.fields

{0: {'name': 'Species', 'index': 0, 'type': 'nominal'},
 1: {'name': 'Island', 'index': 1, 'type': 'nominal'},
 2: {'name': 'Beak Length (mm)', 'index': 2, 'type': 'quantitative'},
 3: {'name': 'Beak Depth (mm)', 'index': 3, 'type': 'quantitative'},
 4: {'name': 'Flipper Length (mm)', 'index': 4, 'type': 'quantitative'},
 5: {'name': 'Body Mass (g)', 'index': 5, 'type': 'quantitative'},
 6: {'name': 'Sex', 'index': 6, 'type': 'nominal'}}

In [8]:
## computed features for each field/column (that are fed into the DL models)
print(type(chartRecommender.feature_dict))

## the features of the first field/column
print(chartRecommender.feature_dict.keys())

## the size of feature
print(np.array(chartRecommender.feature_dict[0]).shape)

<class 'dict'>
dict_keys([0, 1, 2, 3, 4, 5, 6])
(96,)


## Single chart recommender
Return a DataFrame:
- indices: the column indices encoded by this chart
- column_selection_score: the predicted score for the column selection. Min-max normed.
- chart type: the chart type ('area', 'bar', 'scatter', 'line', 'pie')
- chart_type_prob: the likelihood that the selected columns are encoded by the chart type
- final_score: the overall score which is column_selection_score * chart_type_prob

In [9]:
## rank the results by the final_score
recommended_charts = pd.DataFrame.from_records(chartRecommender.charts).sort_values(by='final_score', ascending = False)
recommended_charts.head(5)

Unnamed: 0,chart_type,chart_type_prob,column_selection_score,fields,final_score,indices,n_column
104,bar,0.974798,0.953993,"[{'name': 'Species', 'index': 0, 'type': 'nomi...",0.929951,"(0, 1, 6)",3
80,bar,0.76791,0.119861,"[{'name': 'Beak Depth (mm)', 'index': 3, 'type...",0.092042,"(3, 6)",2
71,bar,0.326807,0.110141,"[{'name': 'Beak Length (mm)', 'index': 2, 'typ...",0.035995,"(2, 6)",2
86,bar,0.082556,0.132629,"[{'name': 'Flipper Length (mm)', 'index': 4, '...",0.010949,"(4, 6)",2
89,bar,0.01968,0.132948,"[{'name': 'Body Mass (g)', 'index': 5, 'type':...",0.002616,"(5, 6)",2


In [10]:
## select the top chart and render it by VegaLiteRender 
recommend_chart = recommended_charts.iloc[0]
vr = VegaLiteRender(chart_type = recommend_chart['chart_type'], columns = recommend_chart['fields'], data = chartRecommender.df.to_dict('records'))

alt.Chart.from_dict(vr.vSpec)

[{'name': 'Species', 'index': 0, 'type': 'nominal'}, {'name': 'Island', 'index': 1, 'type': 'nominal'}, {'name': 'Sex', 'index': 6, 'type': 'nominal'}]


## MV Recommender
Return a MV.
- a MV is describled as a list of charts (corresponding to each record in the above charts_df)
- current_mv: optional. 
- max_charts: number of charts in the returned MV

In [11]:
## load model
mv_model = ScoreNetLSTM(input_size=9, seq_length = 12).to(gpu)
mv_model.load_state_dict(torch.load('trainedModel/mvModel.pt', map_location=gpu))
mv_model.eval()

ScoreNetLSTM(
  (lstm): LSTM(9, 200, batch_first=True)
  (linear): Linear(in_features=2400, out_features=1, bias=True)
)

In [12]:
chartRecommender = ChartRecommender(df, 
                                    word_embedding_dict, column_score_model, chart_type_model)

In [13]:
## Recommending an MV conditioned on current_mv
current_mv = [{'indices': (1,), 'chart_type': 'pie'}]
chartRecommender.recommend_mv(mv_model, current_mv = current_mv, max_charts = len(current_mv) + 1)

[{'indices': (1,), 'chart_type': 'pie'},
 {'indices': (0, 1, 2),
  'fields': [{'name': 'Species', 'index': 0, 'type': 'nominal'},
   {'name': 'Island', 'index': 1, 'type': 'nominal'},
   {'name': 'Beak Length (mm)', 'index': 2, 'type': 'quantitative'}],
  'column_selection_score': 0.5052524224607513,
  'chart_type': 'scatter',
  'chart_type_prob': 5.608765506310378e-23,
  'final_score': 2.833842359077621e-23,
  'n_column': 3}]

In [14]:
len(chartRecommender.charts)

196

In [15]:
## Recommending an MV without conditions
chartRecommender.recommend_mv(mv_model, current_mv = [], max_charts = 4)

[{'indices': (0, 1, 2),
  'fields': [{'name': 'Species', 'index': 0, 'type': 'nominal'},
   {'name': 'Island', 'index': 1, 'type': 'nominal'},
   {'name': 'Beak Length (mm)', 'index': 2, 'type': 'quantitative'}],
  'column_selection_score': 0.5052524224607513,
  'chart_type': 'bar',
  'chart_type_prob': 5.332800072158923e-10,
  'final_score': 2.6944101549571655e-10,
  'n_column': 3},
 {'indices': (0, 1, 3),
  'fields': [{'name': 'Species', 'index': 0, 'type': 'nominal'},
   {'name': 'Island', 'index': 1, 'type': 'nominal'},
   {'name': 'Beak Depth (mm)', 'index': 3, 'type': 'quantitative'}],
  'column_selection_score': 0.4839125316872903,
  'chart_type': 'scatter',
  'chart_type_prob': 4.854947393857243e-22,
  'final_score': 2.349369884570071e-22,
  'n_column': 3},
 {'indices': (0, 1, 4),
  'fields': [{'name': 'Species', 'index': 0, 'type': 'nominal'},
   {'name': 'Island', 'index': 1, 'type': 'nominal'},
   {'name': 'Flipper Length (mm)', 'index': 4, 'type': 'quantitative'}],
  'colum