# viz-2a-cars-splom.ipynb

With this notebook we're creating another scatterplot matrix using the cars dataset you might know from Vega. However, this time we're taking the data from outside Elasticsearch and index it using eland.

In [1]:
import datetime
import altair as alt
import eland as ed
import json
import numpy as np
import matplotlib.pyplot as plt
alt.data_transformers.disable_max_rows()

DataTransformerRegistry.enable('default')

In [2]:
index_name = 'pandas_to_eland'

In [3]:
import vega_datasets
data = vega_datasets.data
pd_df = data.cars()
pd_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 406 entries, 0 to 405
Data columns (total 9 columns):
Name                406 non-null object
Miles_per_Gallon    398 non-null float64
Cylinders           406 non-null int64
Displacement        406 non-null float64
Horsepower          400 non-null float64
Weight_in_lbs       406 non-null int64
Acceleration        406 non-null float64
Year                406 non-null datetime64[ns]
Origin              406 non-null object
dtypes: datetime64[ns](1), float64(4), int64(2), object(2)
memory usage: 28.7+ KB


In [4]:
ed_df = ed.pandas_to_eland(
    pd_df.dropna(),
    'localhost',
    index_name,
    es_if_exists="replace",
    es_refresh=True
)
ed_df.info()

<class 'eland.dataframe.DataFrame'>
Index: 392 entries, 0 to 405
Data columns (total 9 columns):
Acceleration        392 non-null float64
Cylinders           392 non-null int64
Displacement        392 non-null float64
Horsepower          392 non-null float64
Miles_per_Gallon    392 non-null float64
Name                392 non-null object
Origin              392 non-null object
Weight_in_lbs       392 non-null int64
Year                392 non-null datetime64[ns]
dtypes: datetime64[ns](1), float64(4), int64(2), object(2)
memory usage: 96.0 bytes


In [5]:
url = 'http://localhost:9200/'+index_name+'/_search?size=1000'
url_data = alt.Data(url=url, format=alt.DataFormat(property='hits.hits',type='json'))

fields = ed_df.columns

rename_dict = dict((a, 'datum._source.'+a) for a in fields)

chart = alt.Chart(url_data).transform_calculate(**rename_dict).mark_circle(size=8).encode(
    alt.X(alt.repeat("column"), type='quantitative'),
    alt.Y(alt.repeat("row"), type='quantitative'),
    color='Origin:N'
).properties(
    width=150,
    height=150
).repeat(
    row=['Horsepower', 'Acceleration', 'Miles_per_Gallon'],
    column=['Miles_per_Gallon', 'Acceleration', 'Horsepower']
).interactive()

chart

In [6]:
from kibana_vega_util import saveVegaVis
from elasticsearch import Elasticsearch 
es=Elasticsearch([{'host':'localhost','port':9200}])

saveVegaVis(es, index_name, 'def-vega-cars-1', chart, resultSize=1000)

{'_index': '.kibana_3',
 '_id': 'visualization:def-vega-cars-1',
 '_version': 2,
 'result': 'updated',
 '_shards': {'total': 1, 'successful': 1, 'failed': 0},
 '_seq_no': 392,
 '_primary_term': 2}