# specifying data in altair

In [53]:
import altair as alt
from altair.expr import datum
from vega_datasets import data
import pandas as pd
import numpy as np

In [2]:
data = pd.DataFrame({'x':['A','B','C','D','E'],'y':[5,3,6,7,2]})
alt.Chart(data).mark_bar().encode(x='x',y='y')

In [6]:
data = alt.Data(values=[{'x':'A','y':5},
                       {'x':'B','y':3},
                       {'x':'C','y':6},
                       {'x':'D','y':7},
                       {'x':'E','y':5}])
alt.Chart(data).mark_bar().encode(
x='x:O',
y='y:Q')

In [8]:
from vega_datasets import data
url = data.cars.url

In [10]:
alt.Chart(url).mark_point().encode(
x = 'Horsepower:Q',y='Miles_per_Gallon:Q')

In [25]:
# including index data

rand = np.random.RandomState(0)

data = pd.DataFrame({'values':rand.randn(100).cumsum()},index=pd.date_range('2018',freq='D',
                                                                          periods=100))
data.head(5)

alt.Chart(data.reset_index()).mark_line().encode(x='index:T',y='values:Q')

**long-form vs wide-form data**

In [28]:
#wide-form

wide_form = pd.DataFrame({'Date':['2007-10-01','2007-11-01','2007-12-01'],
                         'AAPL':[189.98,182.34,198.06],
                         'AMZN':[89.15,90.56,92.64],
                         'GOOG':[707.00,693.00,691.48]})
print(wide_form)

         Date    AAPL   AMZN    GOOG
0  2007-10-01  189.98  89.15  707.00
1  2007-11-01  182.34  90.56  693.00
2  2007-12-01  198.06  92.64  691.48


In [31]:
#long-form

long_form = pd.DataFrame({'Date':['2007-10-01','2007-11-01','2007-12-01','2007-10-01','2007-11-01','2007-12-01',
                                 '2007-10-01','2007-11-01','2007-12-01'],
                         'Company':['AAPL','AAPL','AAPL','AMZN','AMZN','AMZN','GOOG','GOOG','GOOG'],
                         'prices':[189.98,182.34,198.06,89.15,90.56,92.64,707.00,693.00,691.48]})
print(long_form)


         Date Company  prices
0  2007-10-01    AAPL  189.98
1  2007-11-01    AAPL  182.34
2  2007-12-01    AAPL  198.06
3  2007-10-01    AMZN   89.15
4  2007-11-01    AMZN   90.56
5  2007-12-01    AMZN   92.64
6  2007-10-01    GOOG  707.00
7  2007-11-01    GOOG  693.00
8  2007-12-01    GOOG  691.48


In [33]:
alt.Chart(long_form).mark_line().encode(x='Date:T',y='prices:Q',color='Company:N')

**converting long-form and wide-form: Pandas**

In [34]:
wide_form.melt('Date',var_name='company',value_name='prices')


Unnamed: 0,Date,company,prices
0,2007-10-01,AAPL,189.98
1,2007-11-01,AAPL,182.34
2,2007-12-01,AAPL,198.06
3,2007-10-01,AMZN,89.15
4,2007-11-01,AMZN,90.56
5,2007-12-01,AMZN,92.64
6,2007-10-01,GOOG,707.0
7,2007-11-01,GOOG,693.0
8,2007-12-01,GOOG,691.48


In [37]:
long_form.pivot(index='Date',columns='Company',values='prices').reset_index()

Company,Date,AAPL,AMZN,GOOG
0,2007-10-01,189.98,89.15,707.0
1,2007-11-01,182.34,90.56,693.0
2,2007-12-01,198.06,92.64,691.48


**Fold Transform**

In [39]:
alt.Chart(wide_form).transform_fold(['AAPL','AMZN','GOOG'],as_=['Company','prices']).mark_line().encode(
x = 'Date:T', y= 'prices:Q',color='Company:N')

**Generated data**

In [41]:
#sequences() generator

data = alt.sequence(0,10,0.1,as_='x')
alt.Chart(data).transform_calculate(y='sin(datum.x)').mark_line().encode(x='x:Q',y='y:Q')

**graticule generator**

In [43]:
data = alt.graticule(step=[15,15])

alt.Chart(data).mark_geoshape(stroke='black').project(
'orthographic',rotate=[0,-45,0])

**Sphere generator**

In [44]:
sphere_data = alt.sphere()
grat_data = alt.graticule(step=[15,15])

background = alt.Chart(sphere_data).mark_geoshape(fill='aliceblue')
lines = alt.Chart(grat_data).mark_geoshape(stroke='black')

alt.layer(background,lines).project('naturalEarth1')

**Saving altair charts**

In [55]:
from vega_datasets import data


chart = alt.Chart(data.cars.url).mark_point().encode(x='Horsepower:Q',y='Miles_per_Gallon:Q',color='Origin:N')
chart.save('chart.json')
chart.save('chart.html')
#chart.save('chart.pdf')
#chart.save('chart.png')
#chart.save('chart.svg')

# Compound charts

**layered chart**

In [6]:
stocks = data.stocks.url

base = alt.Chart(stocks).encode(x='date:T',y='price:Q',color='symbol:N').transform_filter(
datum.symbol =='GOOG')

base.mark_line() + base.mark_point()

In [7]:
alt.layer(base.mark_line(),base.mark_point(),base.mark_rule()).interactive()

**order of layers**

In [12]:
source = data.movies.url

heatmap = alt.Chart(source).mark_rect().encode(alt.X('IMDB_Rating:Q',bin=True),alt.Color(
'count()',scale=alt.Scale(scheme='greenblue')))

points = alt.Chart(source).mark_circle(color='black',size=5).encode(x='IMDB_Rating:Q',
                                                                   y='Rotten_Tomatoes_Rating:Q',
                                                                   )

heatmap+points

In [13]:
points + heatmap

**horizontal concatenation**

In [17]:
iris = data.iris.url

chart1 = alt.Chart(iris).mark_point().encode(x='petalLength:Q',y='petalWidth:Q',
                                             color='species:N').properties(height=300,width=300)

chart2 = alt.Chart(iris).mark_bar().encode(x='count()',y=alt.Y('petalWidth:Q',bin=alt.Bin(maxbins=30)),
                                             color='species:N').properties(height=300,width=200)

chart1 | chart2

In [18]:
alt.hconcat(chart1,chart2)

**vertical concatenation**

In [19]:
source = data.sp500.url

brush = alt.selection(type='interval',encodings=['x'])

base = alt.Chart(source).mark_area().encode(x = 'date:T',y='price:Q').properties(width=600,height=200)

upper = base.encode(alt.X('date:T',scale=alt.Scale(domain=brush)))

lower = base.properties(height=60).add_selection(brush)

alt.vconcat(upper,lower)


**repeated charts**

In [20]:
iris = data.iris.url

base = alt.Chart().mark_point().encode(color='species:N').properties(width=200,height=200).interactive()

chart = alt.vconcat(data=iris)

for y_encoding in ['petalLength:Q','petalWidth:Q']:
    row = alt.hconcat()
    for x_encoding in ['sepalLength:Q','petalWidth:Q']:
        row |= base.encode(x=x_encoding,y=y_encoding)
    chart &= row
chart


In [21]:
alt.Chart(iris).mark_point().encode(alt.X(alt.repeat('column'),type='quantitative'),
                                    alt.Y(alt.repeat('row'),type='quantitative'),
                                    color='species:N').properties(width=200,
                                                                  height=200).repeat(row=['petalLength','petalWidth'],
                                                                                              column=['sepalLength',
                                                                                                      'sepalWidth']).interactive()

**Faceted charts**

In [31]:
iris = data.iris.url

base = alt.Chart(iris).mark_point().encode(x='petalLength:Q',y='petalWidth:Q',color='species:N').properties(width=160,
                                                                                                            height=160)

chart = alt.hconcat()
for species in ['setosa','versicolor','virginica']:
    chart |= base.transform_filter(datum.species == species)
chart    
    

In [30]:
alt.Chart(iris).mark_point().encode(x='petalLength:Q',
                                    y='petalWidth:Q',
                                    color='species:N').properties(width=180,height=180).facet(column='species:N')

In [32]:
alt.Chart(iris).mark_point().encode(x='petalLength:Q',y='petalWidth:Q',color='species:N'
                                    ,column='species:N').properties(width=160,
                                                                                                            height=160)

In [33]:
hover = alt.selection_single(on='mouseover',nearest=True,empty='none')

base = alt.Chart(iris).mark_point().encode(x='petalLength:Q',y='petalWidth:Q',
                                           color=alt.condition(hover,'species:N',alt.value('black'))).properties(width=160,
                                                                                                            height=160)

points = base.mark_point().add_selection(hover)

text = base.mark_text(dy=-5).encode(text='species:N',opacity = alt.condition(hover,alt.value(1),alt.value(0)))

alt.layer(points,text).facet('species:N')


**scale and guide resolution**

In [36]:
source = data.cars()

base = alt.Chart(source).mark_point().encode(x='Horsepower:Q',
                                            y='Miles_per_Gallon:Q').properties(width=200,height=200)

alt.concat(base.encode(color='Origin:N'),base.encode(color='Cylinders:O'))

In [37]:
alt.concat(base.encode(color='Origin:N'),base.encode(color='Cylinders:O')).resolve_scale(color='independent')

**dual y axis**

In [39]:
base = alt.Chart(source).encode(alt.X('year(Year):T'))

lineA = base.mark_line(color='#5276A7').encode(alt.Y('average(Horsepower):Q',axis=alt.Axis(titleColor='#5276A7')))

lineB = base.mark_line(color='#F18727').encode(alt.Y('average(Miles_per_Gallon):Q',axis=alt.Axis(titleColor='#F18727')))

alt.layer(lineA,lineB).resolve_scale(y='independent')

In [41]:
base = alt.Chart(source).mark_line().transform_fold(['Horsepower',
                                                     'Miles_per_Gallon'],as_=['Measure',
                                                                              'Value']).encode(alt.Color('Measure:N'),
                                                                                              alt.X('year(Year):T'))

lineA = base.transform_filter(alt.datum.Measure == 'Horsepower').encode(alt.Y('average(Value):Q',
                                                                              axis=alt.Axis(title='Horsepower')))

lineB = base.transform_filter(alt.datum.Measure == 'Miles_per_Gallon').encode(alt.Y('average(Value):Q',
                                                                              axis=alt.Axis(title='Miles_per_Gallon')))

alt.layer(lineA,lineB).resolve_scale(y='independent')

# Date and time in altair

In [2]:
df = pd.DataFrame({'local':['2018-01-01T00:00:00'],'utc':['2018-01-01T00:00:00Z']})

alt.Chart(df).transform_calculate(compliant='hours(datum.local) != hours(datum.utc) ? true : false',
                                 ).mark_text(size=20,baseline='middle').encode(text=alt.condition('datum.compliant',
                                                                                                 alt.value('OK'),
                                                                                                 alt.value('not OK')),
                                                                              color=alt.condition('datum.compliant',
                                                                                                 alt.value('green'),
                                                                                                 alt.value('red'))
                                                                              ).properties(width=80,height=50)

**Altair and pandas Date time**

In [3]:
temps = data.seattle_temps()
temps.head(5)

Unnamed: 0,date,temp
0,2010-01-01 00:00:00,39.4
1,2010-01-01 01:00:00,39.2
2,2010-01-01 02:00:00,39.0
3,2010-01-01 03:00:00,38.9
4,2010-01-01 04:00:00,38.8


In [4]:
temps.dtypes

date    datetime64[ns]
temp           float64
dtype: object

In [7]:
temps = temps[temps.date < '2010-01-15']

alt.Chart(temps).mark_line().encode(x='date:T',y="temp:Q")

In [8]:
alt.Chart(temps).mark_rect().encode(
alt.X('hoursminutes(date):O',title='hour of day'),
alt.Y('monthdate(date):O',title='date'),
alt.Color('temp:Q',title='temperature(F)'))

**time zone**

In [9]:
temps['date_pacific'] = temps['date'].dt.tz_localize('US/Pacific')
temps.dtypes

date                        datetime64[ns]
temp                               float64
date_pacific    datetime64[ns, US/Pacific]
dtype: object

In [10]:
alt.Chart(temps).mark_rect().encode(
alt.X('hoursminutes(date_pacific):O',title='hour of day'),
alt.Y('monthdate(date_pacific):O',title='date'),
alt.Color('temp:Q',title='temperature(F)'))

**Using UTC time**

In [11]:
alt.Chart(temps).mark_rect().encode(
alt.X('hoursminutes(date):O',title='UTC hour of day'),
alt.Y('monthdate(date):O',title='UTC date'),
alt.Color('temp:Q',title='temperature(F)'))

In [14]:
temps['date_utc'] = temps['date'].dt.tz_localize('UTC')

alt.Chart(temps).mark_rect().encode(
alt.X('utchoursminutes(date_utc):O',title='hour of day'),
alt.Y('utcmonthdate(date_utc):O',title='date'),
alt.Color('temp:Q',title='temperature(F)'))

# Customizing Visualizations

**Adjusting axis limits**

In [16]:
cars = data.cars.url

alt.Chart(cars).mark_point().encode(
x='Acceleration:Q',y='Horsepower:Q')

In [17]:
#scale(zero=False)

alt.Chart(cars).mark_point().encode(alt.X('Acceleration:Q',scale=alt.Scale(zero=False)),y='Horsepower:Q')

In [18]:
#domain

alt.Chart(cars).mark_point().encode(alt.X('Acceleration:Q',scale=alt.Scale(domain=(5,20))),y='Horsepower:Q')

In [19]:
#clip

alt.Chart(cars).mark_point(clip=True).encode(alt.X('Acceleration:Q',scale=alt.Scale(domain=(5,20))),y='Horsepower:Q')

In [20]:
#clamp

alt.Chart(cars).mark_point().encode(alt.X('Acceleration:Q',scale=alt.Scale(domain=(5,20),clamp=True)),
                                    y='Horsepower:Q').interactive()

**ADjusting Axis Lables**

In [23]:
df = pd.DataFrame({'x':[0.03,0.04,0.05,0.12,0.07,0.15],'y':[10,35,39,50,24,35]})

alt.Chart(df).mark_circle().encode(x='x',y='y')

In [28]:
alt.Chart(df).mark_circle().encode(x=alt.X('x',axis=alt.Axis(format='%',title='percentage')),
                                   y=alt.Y('y',axis=alt.Axis(format='$',title='dollar amount')))

**Adjusting the legends**

In [29]:
iris = data.iris()

alt.Chart(iris).mark_point().encode(x='petalWidth',y='petalLength',color='species')

In [30]:
iris = data.iris()

alt.Chart(iris).mark_point().encode(x='petalWidth',y='petalLength',color=alt.Color('species',
                                                                                   legend=alt.Legend(title='species by color')))

In [31]:
iris = data.iris()

alt.Chart(iris).mark_point().encode(x='petalWidth',y='petalLength',color=alt.Color('species',
                                                                                   legend=alt.Legend(orient='left')))

In [36]:
iris = data.iris()

alt.Chart(iris).mark_point().encode(x='petalWidth',y='petalLength',color=alt.Color('species',legend=None))

**Removing the chart border**

In [37]:
iris = data.iris()

alt.Chart(iris).mark_point().encode(x='petalWidth',y='petalLength',color='species')

In [38]:
#chart.configure_axis

iris = data.iris()

alt.Chart(iris).mark_point().encode(x='petalWidth',y='petalLength',color='species').configure_axis(grid=False)

In [39]:
#chart.configure_view

iris = data.iris()

alt.Chart(iris).mark_point().encode(x='petalWidth'
                                    ,y='petalLength',color='species').configure_axis(grid=False).configure_view(strokeWidth=0)

In [41]:
#axis =None

iris = data.iris()

alt.Chart(iris).mark_point().encode(alt.X('petalWidth',axis=None),
                                    alt.Y('petalLength',axis=None),
                                    color='species').configure_axis(grid=False).configure_view(strokeWidth=0)

**Color Scheme**

In [42]:
iris = data.iris()

alt.Chart(iris).mark_point().encode(x='petalWidth',y='petalLength',color=alt.Color('species',scale=alt.Scale(scheme='dark2')))

In [43]:
#color domain range

iris = data.iris()

domain = ['setosa','versicolor','virginica']
range_ =['red','green','blue']

alt.Chart(iris).mark_point().encode(x='petalWidth',y='petalLength',color=alt.Color('species',scale=alt.Scale(domain=domain,
                                                                                                            range=range_)))

In [45]:
#Raw color values

data= pd.DataFrame({'x':range(6),'color':['red','steelblue','chartreuse','#F4D03F','#D35400','#7D3C98']})

alt.Chart(data).mark_point(filled=True,size=100).encode(x='x',color=alt.Color('color',scale=None))

**Adjusting nthe width of the bar**

In [46]:
data = pd.DataFrame({'name':['a','b'],'value':[4,10]})

alt.Chart(data).mark_bar(size=10).encode(x='name:O',y='value:Q')

In [47]:
alt.Chart(data).mark_bar(size=30).encode(x='name:O',y='value:Q')

In [48]:
alt.Chart(data).mark_bar(size=30).encode(x='name:O',y='value:Q').properties(width=200)

In [50]:
alt.Chart(data).mark_bar(size=30).encode(x='name:O',y='value:Q').properties(width=alt.Step(100))

**Adjusting size chart**

In [54]:
cars = data.cars()

alt.Chart(cars).mark_bar().encode(x='Origin',y='count()').properties(width=200,height=150)

In [55]:
cars = data.cars()

alt.Chart(cars).mark_bar().encode(x='Origin',y='count()',column='Cylinders:Q').properties(width=100,height=100)