# Progamatically explore our data samples with pandas

In [1]:
import yt
from yt.sample_data.api import get_data_registry_table
yt.set_log_level(10)

yt : [DEBUG    ] 2020-10-17 18:49:30,563 Set log level to 10


Get the dataframe corresponding to https://yt-project.org/data/

Note: the data is actually fetched directly from github so it may be slightly more up to date than the yt website
https://raw.githubusercontent.com/yt-project/website/master/data/datafiles.json

In [2]:
reg = get_data_registry_table()
reg

Unnamed: 0_level_0,code,size,url,byte size,load name
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
ArepoBullet,Arepo,770 MB,http://yt-project.org/data/ArepoBullet.tar.gz,770000000,snapshot_150.hdf5
TNGHalo,Arepo,1012 MB,http://yt-project.org/data/TNGHalo.tar.gz,1012000000,halo_59.hdf5
D9p_500,NMSU-ART,343 MB,http://yt-project.org/data/D9p_500.tar.gz,343000000,10MpcBox_HartGal_csf_a0.500.d
DMonly,NMSU-ART,1 GB,http://yt-project.org/data/DMonly.tar.gz,1000000000,
sizmbhloz-clref04SNth-rs9_a0.9011,ARTIO,93 MB,http://yt-project.org/data/sizmbhloz-clref04SN...,93000000,sizmbhloz-clref04SNth-rs9_a0.9011.art
...,...,...,...,...,...
TipsyAuxiliary,Gasoline,39 MB,http://yt-project.org/data/TipsyAuxiliary.tar.gz,39000000,
medium_tipsy,Gasoline,184 MB,http://yt-project.org/data/medium_tipsy.tar.gz,184000000,g1536.00256
big_tipsy,Gasoline,1.4 GB,http://yt-project.org/data/big_tipsy.tar.gz,1400000000,g1536.00256
SimbaExample,Gizmo,14 GB,http://yt-project.org/data/SimbaExample.tar.gz,14000000000,


In [3]:
# overview the available data types
print(sorted(reg["code"].unique())[:10])

['AHF', 'AMReX', 'ARTIO', 'AdaptaHOP', 'Arepo', 'Athena', 'Athena++', 'Castro', 'Chombo', 'Einstein Toolkit']


In [4]:
# get Enzo data samples, sorted by increasing file size
reg[reg["code"] == "Enzo"].sort_values("byte size")

Unnamed: 0_level_0,code,size,url,byte size,load name
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
ToroShockTube,Enzo,10 kB,http://yt-project.org/data/ToroShockTube.tar.gz,10000,DD0001/data0001
MHDCTOrszagTang,Enzo,170 kB,http://yt-project.org/data/MHDCTOrszagTang.tar.gz,170000,DD0004/data0004
EnzoKelvinHelmholtz,Enzo,4.3 MB,http://yt-project.org/data/EnzoKelvinHelmholtz...,4300000,DD0011/DD0011
ActiveParticleTwoSphere,Enzo,22 MB,http://yt-project.org/data/ActiveParticleTwoSp...,22000000,DD0011/DD0011
ActiveParticleCosmology,Enzo,73 MB,http://yt-project.org/data/ActiveParticleCosmo...,73000000,DD0046/DD0046
IsolatedGalaxy_Gravity,Enzo,177 MB,http://yt-project.org/data/IsolatedGalaxy_Grav...,177000000,galaxy0030/galaxy0030
PopIII_mini,Enzo,290 MB,http://yt-project.org/data/PopIII_mini.tar.gz,290000000,DD0034/DD0034
IsolatedGalaxy,Enzo,292 MB,http://yt-project.org/data/IsolatedGalaxy.tar.gz,292000000,galaxy0030/galaxy0030
enzo_tiny_cosmology,Enzo,419 MB,http://yt-project.org/data/enzo_tiny_cosmology...,419000000,DD0046/DD0046
enzo_cosmology_plus,Enzo,779 MB,http://yt-project.org/data/enzo_cosmology_plus...,779000000,DD0046/DD0046


In [5]:
# load up a small dataset
ds = yt.load_sample2("ToroShockTube")

yt : [INFO     ] 2020-10-17 18:49:33,299 ToroShockTube is not available locally. Looking up online.
yt : [INFO     ] 2020-10-17 18:49:33,301 Downloading from http://yt-project.org/data/ToroShockTube.tar.gz
Downloading file 'ToroShockTube.tar.gz' from 'https://yt-project.org/data/ToroShockTube.tar.gz' to '/Users/clm/dev/python/yt-project/test_data/yt_download_cache'.
100%|█████████████████████████████████████| 10.2k/10.2k [00:00<00:00, 6.81MB/s]
yt : [INFO     ] 2020-10-17 18:49:33,716 Untaring downloaded file to /Users/clm/dev/python/yt-project/test_data
yt : [INFO     ] 2020-10-17 18:49:33,849 Parameters: current_time              = 0.2
yt : [INFO     ] 2020-10-17 18:49:33,850 Parameters: domain_dimensions         = [100   1   1]
yt : [INFO     ] 2020-10-17 18:49:33,852 Parameters: domain_left_edge          = [0. 0. 0.]
yt : [INFO     ] 2020-10-17 18:49:33,857 Parameters: domain_right_edge         = [1. 1. 1.]
yt : [INFO     ] 2020-10-17 18:49:33,865 Parameters: cosmological_simulatio

In [6]:
# second call: the log tells us that we don't need to download this time !
ds = yt.loaders.load_sample2("ToroShockTube")

yt : [INFO     ] 2020-10-17 18:50:02,322 Sample dataset found at /Users/clm/dev/python/yt-project/test_data/ToroShockTube
yt : [INFO     ] 2020-10-17 18:50:02,392 Parameters: current_time              = 0.2
yt : [INFO     ] 2020-10-17 18:50:02,394 Parameters: domain_dimensions         = [100   1   1]
yt : [INFO     ] 2020-10-17 18:50:02,397 Parameters: domain_left_edge          = [0. 0. 0.]
yt : [INFO     ] 2020-10-17 18:50:02,401 Parameters: domain_right_edge         = [1. 1. 1.]
yt : [INFO     ] 2020-10-17 18:50:02,405 Parameters: cosmological_simulation   = 0.0
