# Chart Recommender
This file provides an example of running the chart recommender, including the single chart recommender and the multiple-charts recommender.     
The input is a data table in Pandas.DataFrame.  

### Single-Chart Recommender
The output is the recommended charts, described by the column selection and chart type.     

### MV Recommender
The output is the recommended MVs, described as a list of charts.

Notes of limitations:
- A chart can encode a max number of 4 data columns.
- A MV can have a max number of 12 charts.
- The predicted chart type is limited to ('area', 'bar', 'scatter', 'line', 'pie')

In [1]:
import pandas as pd
import json
import numpy as np
import itertools
import sys
import re
import altair as alt

import torch
from torch.nn.utils.rnn import pack_padded_sequence, pad_packed_sequence
from torch.autograd import Variable
import torch.nn.functional as nnf

from model.encodingModel import ChartTypeNN, ChartTypeLSTM, ScoreNetLSTM
# from utils.helper import softmax, get_data_feature_by_column, get_embed_feature_by_column, get_all_charts_scores, charts_to_features
from utils.ChartRecommender import ChartRecommender
from utils.VegaLiteRender import VegaLiteRender

In [2]:
%load_ext autoreload
%autoreload 2

## Load pretrained word-embedding model

In [3]:
word_embedding_model_path = 'utils/en-50d-200000words.vec'

word_embedding_dict = {}
with open(word_embedding_model_path) as file_in:
    lines = []
    for idx, line in enumerate(file_in):
        if idx == 0: ## line 0 is invalid
            continue 
        word, *features = line.split()
        word_embedding_dict[word] = np.array(features)

## Load trained single-chart assessment model and chart type prediction model

In [None]:
gpu = torch.device('cuda:0')

column_score_model = ScoreNetLSTM(input_size=96, seq_length = 4, batch_size=2, pack = True).to(gpu)
column_score_model.load_state_dict(torch.load('trainedModel/singleChartModel.pt', map_location=gpu))
column_score_model.eval()

chart_type_model = ChartTypeLSTM(input_size = 96, hidden_size = 400, seq_length = 4, num_class = 9, bidirectional = True).to(gpu)
chart_type_model.load_state_dict(torch.load('trainedModel/chartType.pt', map_location=gpu))
chart_type_model.eval()

## Data loader and pre-processing

In [None]:
df = pd.read_csv('csv/penguins.csv')
chartRecommender = ChartRecommender(df, 
                                    word_embedding_dict, column_score_model, chart_type_model)

In [None]:
## the dataset
chartRecommender.df.head()

In [None]:
## the fields/columns of the dataset
chartRecommender.fields

In [None]:
## computed features for each field/column (that are fed into the DL models)
print(type(chartRecommender.feature_dict))

## the features of the first field/column
print(chartRecommender.feature_dict.keys())

## the size of feature
print(np.array(chartRecommender.feature_dict[0]).shape)

## Single chart recommender
Return a DataFrame:
- indices: the column indices encoded by this chart
- column_selection_score: the predicted score for the column selection. Min-max normed.
- chart type: the chart type ('area', 'bar', 'scatter', 'line', 'pie')
- chart_type_prob: the likelihood that the selected columns are encoded by the chart type
- final_score: the overall score which is column_selection_score * chart_type_prob

In [None]:
## rank the results by the final_score
recommended_charts = pd.DataFrame.from_records(chartRecommender.charts).sort_values(by='final_score', ascending = False)
recommended_charts.head(5)

In [None]:
## select the top chart and render it by VegaLiteRender 
recommend_chart = recommended_charts.iloc[0]
vr = VegaLiteRender(chart_type = recommend_chart['chart_type'], columns = recommend_chart['fields'], data = chartRecommender.df.to_dict('records'))

alt.Chart.from_dict(vr.vSpec)

## MV Recommender
Return a MV.
- a MV is describled as a list of charts (corresponding to each record in the above charts_df)
- current_mv: optional. 
- max_charts: number of charts in the returned MV

In [None]:
## load model
mv_model = ScoreNetLSTM(input_size=9, seq_length = 12).to(gpu)
mv_model.load_state_dict(torch.load('trainedModel/mvModel.pt', map_location=gpu))
mv_model.eval()

In [None]:
chartRecommender = ChartRecommender(df, 
                                    word_embedding_dict, column_score_model, chart_type_model)

In [None]:
## Recommending an MV conditioned on current_mv
current_mv = [{'indices': (1,), 'chart_type': 'pie'}]
chartRecommender.recommend_mv(mv_model, current_mv = current_mv, max_charts = len(current_mv) + 1)

In [None]:
len(chartRecommender.charts)

In [None]:
## Recommending an MV without conditions
chartRecommender.recommend_mv(mv_model, current_mv = [], max_charts = 4)