Skip to content

Commit

Permalink
Merge pull request #444 from goelakash/datapackage.json-format
Browse files Browse the repository at this point in the history
Parsed script files to json
  • Loading branch information
henrykironde committed May 24, 2016
2 parents 85d9e01 + 3356342 commit 536b783
Show file tree
Hide file tree
Showing 34 changed files with 2,130 additions and 2 deletions.
121 changes: 121 additions & 0 deletions lib/parse_script_to_json.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,121 @@
import json
import os


SCRIPT_DIR = "../scripts/"
JSON_DIR = "../scripts/"


def parse_script_to_json(script_file):
definition = open(SCRIPT_DIR + script_file + ".script", 'rb')

values = {}
urls = {}
tables = {}
last_table = ""
replace = []
keys_to_ignore = ["template"]

for line in [line.strip() for line in definition]:
if line and ':' in line and not line[0] == '#':
split_line = [a.strip() for a in line.split(":")]
key = split_line[0].lower()
value = ':'.join(split_line[1:])
if key == "table":
table_name = value.split(',')[0].strip()
last_table = table_name
table_url = ','.join(value.split(',')[1:]).strip()
urls[table_name] = table_url
if replace:
try:
tables[last_table]
except:
tables[table_name] = {'replace_columns': str(replace)}
elif key == "*nulls":
if last_table:
nulls = [eval(v) for v in [v.strip()
for v in value.split(',')]]
try:
tables[last_table]
except KeyError:
if replace:
tables[last_table] = {
'replace_columns': str(replace)}
else:
tables[last_table] = {}
tables[last_table]['cleanup'] = "Cleanup(correct_invalid_value, nulls=" + str(nulls) + ")"
elif key == "replace":
replace = [(v.split(',')[0].strip(), v.split(',')[1].strip())
for v in [v.strip() for v in value.split(';')]]
elif key == "tags":
values["tags"] = [v.strip() for v in value.split(',')]
elif key == "*ct_names":
tables[last_table]["ct_names"] = [v.strip()
for v in value.split(',')]
elif key == "*column":
if last_table:
vs = [v.strip() for v in value.split(',')]
column = [
(vs[0], (vs[1], vs[2]) if len(vs) > 2 else (vs[1],))]
try:
tables[last_table]
except KeyError:
tables[last_table] = {}

try:
tables[last_table]['columns'] += column
except KeyError:
tables[last_table]['columns'] = column
elif key[0] == "*":
# attribute that should be applied to the most recently
# declared table
if key[0] == "*":
key = key[1:]
if last_table:
try:
tables[last_table]
except KeyError:
tables[last_table] = {}

try:
e = eval(value)
except:
e = str(value)

tables[last_table][key] = "'" + str(e) + "'"
else:
values[key] = str(value)

if 'shortname' not in values.keys():
try:
values['shortname'] = values['name']
except:
pass
values['urls'] =urls

table_desc = {}
for (key, value) in tables.items():
table_desc[key]={}
for v_key, v_value in value.items():
table_desc[key][v_key] = v_value
values['tables'] = table_desc

for key, value in values.items():
if key == "url":
key = "ref"
if key in keys_to_ignore:
values.pop(key,None)

with open(JSON_DIR+script_file + '.json', 'w') as json_file:
json.dump(values,json_file,sort_keys=True, indent=4,
separators=(',', ': '))
json_file.write('\n')
json_file.close()

definition.close()

if __name__=="__main__":
for file in os.listdir(SCRIPT_DIR):
if file[-6:]=="script":
# print file
parse_script_to_json(file[:-7])
115 changes: 115 additions & 0 deletions scripts/AmnioteLH.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,115 @@
{
"citation": "Myhrvold, N.P., Baldridge, E., Chan, B., Sivam, D., Freeman, D.L. and Ernest, S.M., 2015. An amniote life-history database to perform comparative analyses with birds, mammals, and reptiles:Ecological Archives E096-269. Ecology, 96(11), pp.3109-000.",
"description": "Compilation of life history traits for birds, mammals, and reptiles.",
"name": "Amniote life History database",
"ref": "http://esapubs.org/archive/ecol/E096/269",
"shortname": "AmnioteDB",
"tables": {
"main": {
"cleanup": "Cleanup(correct_invalid_value, nulls=[-999])",
"columns": [
[
"record_id",
[
"pk-auto"
]
],
[
"class",
[
"char",
"20"
]
],
[
"order",
[
"char",
"20"
]
],
[
"family",
[
"char",
"20"
]
],
[
"genus",
[
"char",
"20"
]
],
[
"species",
[
"char",
"20"
]
],
[
"subspecies",
[
"char",
"20"
]
],
[
"common_name",
[
"char",
"40"
]
],
[
"trait_value",
[
"ct-double"
]
]
],
"ct_column": "'trait'",
"ct_names": [
"female_maturity_d",
"litter_or_clutch_size_n",
"litters_or_clutches_per_y",
"adult_body_mass_g",
"maximum_longevity_y",
"gestation_d",
"weaning_d",
"birth_or_hatching_weight_g",
"weaning_weight_g",
"egg_mass_g",
"incubation_d",
"fledging_age_d",
"longevity_y",
"male_maturity_d",
"inter_litter_or_interbirth_interval_y",
"female_body_mass_g",
"male_body_mass_g",
"no_sex_body_mass_g",
"egg_width_mm",
"egg_length_mm",
"fledging_mass_g",
"adult_svl_cm",
"male_svl_cm",
"female_svl_cm",
"birth_or_hatching_svl_cm",
"female_svl_at_maturity_cm",
"female_body_mass_at_maturity_g",
"no_sex_svl_cm",
"no_sex_maturity_d"
],
"delimiter": "','"
}
},
"tags": [
"Taxon > Mammals",
"Data Type > Compilation"
],
"urls": {
"main": "http://esapubs.org/archive/ecol/E096/269/Data_Files/Amniote_Database_Aug_2015.csv"
}
}
12 changes: 12 additions & 0 deletions scripts/Baldridge2012.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
{
"citation": "Baldridge, Elita, A Data-intensive Assessment of the Species Abundance Distribution(2013). All Graduate Theses and Dissertations. Paper 4276.",
"description": "Community abundance data for fish, reptiles, amphibians, beetles, spiders, and birds, compiled from the literature by Elita Baldridge.",
"name": "Miscellaneous Abundance Database (figshare 2012)",
"shortname": "MiscAbundanceDB",
"tables": {},
"urls": {
"citations": "http://files.figshare.com/2023506/Citations_table_abundances.csv",
"main": "http://files.figshare.com/2023547/Species_abundances.csv",
"sites": "http://files.figshare.com/2023504/Sites_table_abundances.csv"
}
}
18 changes: 18 additions & 0 deletions scripts/Dryad_tamburello_etal_2013.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
{
"citation": "Tamburello N, Cote IM, Dulvy NK (2015) Energy and the scaling of animal space use. The American Naturalist 186(2):196-211. http://dx.doi.org/10.1086/682070.",
"description": "Database of mean species masses and corresponding empirically measured home range sizes for 569 vertebrate species from across the globe, including birds, mammals, reptiles, and fishes.",
"name": "Database of Vertebrate Home Range Sizes- Tamburello , et al., 2015",
"ref": "http://datadryad.org/resource/doi:10.5061/dryad.q5j65/1",
"shortname": "HomeRanges",
"tables": {
"ranges": {
"cleanup": "Cleanup(correct_invalid_value, nulls=['NA'])"
}
},
"tags": [
"Data Type > Compilation"
],
"urls": {
"ranges": "http://datadryad.org/bitstream/handle/10255/dryad.84768/Tamburelloetal_HomeRangeDatabase.csv"
}
}
49 changes: 49 additions & 0 deletions scripts/EA_Chuetal2013.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,49 @@
{
"citation": "Cover, density, and demographics of shortgrass steppe plants mapped 1997-2010 in permanent grazed and ungrazed quadrats. Chengjin Chu, John Norman, Robert Flynn, Nicole Kaplan, William K. Lauenroth, and Peter B. Adler. Ecology 2013 94:6, 1435-1435.",
"description": "This data set maps and analyzes demographic rates of many common plant species in the shortgrass steppe of North America under grazed and ungrazed conditions.",
"name": "Shortgrass steppe plants.",
"ref": "http://www.esajournals.org/doi/abs/10.1890/13-0121.1",
"shortname": "Steppe_plants_2013",
"tables": {
"allrecords_cover": {
"cleanup": "Cleanup(correct_invalid_value, nulls=['NA'])"
},
"allrecords_density": {
"cleanup": "Cleanup(correct_invalid_value, nulls=['NA'])"
},
"daily_climate": {
"cleanup": "Cleanup(correct_invalid_value, nulls=['NA'])"
},
"quad_info": {
"cleanup": "Cleanup(correct_invalid_value, nulls=['NA'])"
},
"quad_inventory": {
"cleanup": "Cleanup(correct_invalid_value, nulls=['NA'])"
},
"quad_stocking_rate": {
"cleanup": "Cleanup(correct_invalid_value, nulls=['NA'])"
},
"species_list": {
"cleanup": "Cleanup(correct_invalid_value, nulls=['NA'])"
},
"species_name_changes": {
"cleanup": "Cleanup(correct_invalid_value, nulls=['NA'])"
}
},
"tags": [
"Taxon > Plants",
"Spatial Scale > Local",
"Data Type > Time Series",
"Data Type > Observational"
],
"urls": {
"allrecords_cover": "http://esapubs.org/archive/ecol/E094/128/allrecords_cover.csv",
"allrecords_density": "http://esapubs.org/archive/ecol/E094/128/allrecords_density.csv",
"daily_climate": "http://esapubs.org/archive/ecol/E094/128/daily_climate_data.csv",
"quad_info": "http://esapubs.org/archive/ecol/E094/128/quad_info.csv",
"quad_inventory": "http://esapubs.org/archive/ecol/E094/128/quad_inventory.csv",
"quad_stocking_rate": "http://esapubs.org/archive/ecol/E094/128/quad_stocking_rate.csv",
"species_list": "http://esapubs.org/archive/ecol/E094/128/species_list.csv",
"species_name_changes": "http://esapubs.org/archive/ecol/E094/128/species_name_changes.csv"
}
}
39 changes: 39 additions & 0 deletions scripts/EA_Colin2014.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
{
"citation": "Colin T. Kremer, Jacob P. Gillette, Lars G. Rudstam, Pal Brettum, and Robert Ptacnik. 2014. A compendium of cell and natural unit biovolumes for >1200 freshwater phytoplankton species. Ecology 95:2984.",
"description": "Sampling phytoplankton communities basing on cell size.",
"name": "Biovolumes for freshwater phytoplankton - Colin et al. 2014",
"ref": "http://www.esapubs.org/archive/ecol/E095/257/",
"shortname": "PhytoplankonBiovolume",
"tables": {
"bvd_genus_ag": {
"cleanup": "Cleanup(correct_invalid_value, nulls=['NA'])"
},
"bvd_genus_raw": {
"cleanup": "Cleanup(correct_invalid_value, nulls=['NA'])"
},
"bvd_raw": {
"cleanup": "Cleanup(correct_invalid_value, nulls=['NA'])"
},
"bvd_species_ag": {
"cleanup": "Cleanup(correct_invalid_value, nulls=['NA'])"
},
"bvd_species_raw": {
"cleanup": "Cleanup(correct_invalid_value, nulls=['NA'])"
},
"taxa_table": {
"cleanup": "Cleanup(correct_invalid_value, nulls=['NA'])"
}
},
"tags": [
"Taxon > Phytoplankton",
"Data Type > Compilation"
],
"urls": {
"bvd_genus_ag": "http://esapubs.org/archive/ecol/E095/257/bvd_genus_ag_030614.csv",
"bvd_genus_raw": "http://esapubs.org/archive/ecol/E095/257/bvd_genus_raw_030614.csv",
"bvd_raw": "http://esapubs.org/archive/ecol/E095/257/bvd_raw_052814.csv",
"bvd_species_ag": "http://esapubs.org/archive/ecol/E095/257/bvd_species_ag_030614.csv",
"bvd_species_raw": "http://esapubs.org/archive/ecol/E095/257/bvd_species_raw_030614.csv",
"taxa_table": "http://esapubs.org/archive/ecol/E095/257/taxa_table_030614.csv"
}
}

0 comments on commit 536b783

Please sign in to comment.