## Generate larger versions of the vega datasets

In [2]:
from vega_datasets import data
import pandas as pd
from math import log10
import altair as alt
from pathlib import Path
import json
import vegafusion_jupyter as vf

In [3]:
outdir = Path("data") / "vega"
outdir.mkdir(exist_ok=True)

### Flights

In [None]:
for dups in [10, 20, 50, 100, 200, 500, 1000]:
    n = 10000*dups
    n_commas = int(log10(n) // 3)
    suffix = ["", "k", "m"][n_commas]
    filename = "flights_{}{}".format(int(n / 10**(n_commas * 3)), suffix)
    print(filename)
    
    # Build dataframe
    df = pd.concat([data.flights_10k()]*dups, axis=0).reset_index()
    
    # Write json for size up to 1 million rows
    if n <= 1e6:
        with open(outdir / (filename + ".json"), "wt") as f:
            json.dump(alt.data.to_values(df), f)

    # Write feather
    vf.to_feather(df, outdir / (filename + ".feather"))

### Seattle Weather

In [None]:
for dups in [7, 14, 35, 69, 137, 343, 685, 1369]:
    n = 1461*dups
    n_commas = int(log10(n) // 3)
    suffix = ["", "k", "m"][n_commas]
    filename = "seattle_weather_{}{}".format(int(n / 10**(n_commas * 3)), suffix)
    print(filename)
    
    # Build dataframe
    df = pd.concat([data.seattle_weather()]*dups, axis=0).reset_index()
    
    # Write json for size up to 1 million rows
    if n <= 1e6:
        with open(outdir / (filename + ".json"), "wt") as f:
            json.dump(alt.data.to_values(df), f)

    # Write feather
    vf.to_feather(df, outdir / (filename + ".feather"))