# AWS S3 support
 * Streams in data that is needed 
 * Useful is you spin down/up a AWS machine

In [None]:
import vaex
import numpy as np
df = vaex.open('s3://vaex/taxi/yellow_taxi_2015_f32s.hdf5?anon=true')

In [None]:
df

In [None]:
df.passenger_count.sum(progress=True)

# Remote dataframe
 * Data at server
 * State changes at client
 * Server is stateless
    * but does some caching for optmization

In [None]:
token = open('token-STSci.txt').read().strip()
df = vaex.open(f'ws://ec2-18-222-183-211.us-east-2.compute.amazonaws.com:9000/gaia_ps1_nochunk?token_trusted={token}')

In [None]:
df.plot('ra', 'dec', f='log')

In [None]:
np.deg2rad(df.ra)

# xarray support
 * binby instead of groupby

In [None]:
import vaex
df = vaex.open('/data/yellow_taxi_2009_2015_f32.hdf5')
df = df.dropna(column_names=['dropoff_latitude', 'dropoff_longitude', 'pickup_latitude'])
df

In [None]:
# Define a mapping dictionary
map_payment_type = {'csh': 2, 'crd': 1, 'cash': 2, '1': 1, 'cas': 2, '2': 2, 'credit': 1, 'cre': 1, 'unk': 5, 
                    'noc': 3, 'no charge': 3, '3':3, 'dis': 4, 'no ': 3, '4': 4, 'dispute': 4, 'na ': 5, '5':5}

df['payment_type'] = df.payment_type.str.lower().map(map_payment_type, 
                                                                  default_value=7, 
                                                                  allow_missing=True) -1
df.categorize(df.payment_type, labels=['Credit card', 'Cash', 'No charge', 'Dispute', 'Unknown', 'Voided trip', 'NA'],
             check=False)

In [None]:
da = df.binby([
    vaex.groupby.BinnerTime.per_month(df.pickup_datetime),
    df.payment_type
], agg='count')
da

In [None]:
import numpy as np
import pylab as plt
plt.figure(figsize=(10,6), dpi=200)
np.log10(da).plot(hue='payment_type');