-
Notifications
You must be signed in to change notification settings - Fork 133
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #444 from goelakash/datapackage.json-format
Parsed script files to json
- Loading branch information
Showing
34 changed files
with
2,130 additions
and
2 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,121 @@ | ||
import json | ||
import os | ||
|
||
|
||
SCRIPT_DIR = "../scripts/" | ||
JSON_DIR = "../scripts/" | ||
|
||
|
||
def parse_script_to_json(script_file): | ||
definition = open(SCRIPT_DIR + script_file + ".script", 'rb') | ||
|
||
values = {} | ||
urls = {} | ||
tables = {} | ||
last_table = "" | ||
replace = [] | ||
keys_to_ignore = ["template"] | ||
|
||
for line in [line.strip() for line in definition]: | ||
if line and ':' in line and not line[0] == '#': | ||
split_line = [a.strip() for a in line.split(":")] | ||
key = split_line[0].lower() | ||
value = ':'.join(split_line[1:]) | ||
if key == "table": | ||
table_name = value.split(',')[0].strip() | ||
last_table = table_name | ||
table_url = ','.join(value.split(',')[1:]).strip() | ||
urls[table_name] = table_url | ||
if replace: | ||
try: | ||
tables[last_table] | ||
except: | ||
tables[table_name] = {'replace_columns': str(replace)} | ||
elif key == "*nulls": | ||
if last_table: | ||
nulls = [eval(v) for v in [v.strip() | ||
for v in value.split(',')]] | ||
try: | ||
tables[last_table] | ||
except KeyError: | ||
if replace: | ||
tables[last_table] = { | ||
'replace_columns': str(replace)} | ||
else: | ||
tables[last_table] = {} | ||
tables[last_table]['cleanup'] = "Cleanup(correct_invalid_value, nulls=" + str(nulls) + ")" | ||
elif key == "replace": | ||
replace = [(v.split(',')[0].strip(), v.split(',')[1].strip()) | ||
for v in [v.strip() for v in value.split(';')]] | ||
elif key == "tags": | ||
values["tags"] = [v.strip() for v in value.split(',')] | ||
elif key == "*ct_names": | ||
tables[last_table]["ct_names"] = [v.strip() | ||
for v in value.split(',')] | ||
elif key == "*column": | ||
if last_table: | ||
vs = [v.strip() for v in value.split(',')] | ||
column = [ | ||
(vs[0], (vs[1], vs[2]) if len(vs) > 2 else (vs[1],))] | ||
try: | ||
tables[last_table] | ||
except KeyError: | ||
tables[last_table] = {} | ||
|
||
try: | ||
tables[last_table]['columns'] += column | ||
except KeyError: | ||
tables[last_table]['columns'] = column | ||
elif key[0] == "*": | ||
# attribute that should be applied to the most recently | ||
# declared table | ||
if key[0] == "*": | ||
key = key[1:] | ||
if last_table: | ||
try: | ||
tables[last_table] | ||
except KeyError: | ||
tables[last_table] = {} | ||
|
||
try: | ||
e = eval(value) | ||
except: | ||
e = str(value) | ||
|
||
tables[last_table][key] = "'" + str(e) + "'" | ||
else: | ||
values[key] = str(value) | ||
|
||
if 'shortname' not in values.keys(): | ||
try: | ||
values['shortname'] = values['name'] | ||
except: | ||
pass | ||
values['urls'] =urls | ||
|
||
table_desc = {} | ||
for (key, value) in tables.items(): | ||
table_desc[key]={} | ||
for v_key, v_value in value.items(): | ||
table_desc[key][v_key] = v_value | ||
values['tables'] = table_desc | ||
|
||
for key, value in values.items(): | ||
if key == "url": | ||
key = "ref" | ||
if key in keys_to_ignore: | ||
values.pop(key,None) | ||
|
||
with open(JSON_DIR+script_file + '.json', 'w') as json_file: | ||
json.dump(values,json_file,sort_keys=True, indent=4, | ||
separators=(',', ': ')) | ||
json_file.write('\n') | ||
json_file.close() | ||
|
||
definition.close() | ||
|
||
if __name__=="__main__": | ||
for file in os.listdir(SCRIPT_DIR): | ||
if file[-6:]=="script": | ||
# print file | ||
parse_script_to_json(file[:-7]) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,115 @@ | ||
{ | ||
"citation": "Myhrvold, N.P., Baldridge, E., Chan, B., Sivam, D., Freeman, D.L. and Ernest, S.M., 2015. An amniote life-history database to perform comparative analyses with birds, mammals, and reptiles:Ecological Archives E096-269. Ecology, 96(11), pp.3109-000.", | ||
"description": "Compilation of life history traits for birds, mammals, and reptiles.", | ||
"name": "Amniote life History database", | ||
"ref": "http://esapubs.org/archive/ecol/E096/269", | ||
"shortname": "AmnioteDB", | ||
"tables": { | ||
"main": { | ||
"cleanup": "Cleanup(correct_invalid_value, nulls=[-999])", | ||
"columns": [ | ||
[ | ||
"record_id", | ||
[ | ||
"pk-auto" | ||
] | ||
], | ||
[ | ||
"class", | ||
[ | ||
"char", | ||
"20" | ||
] | ||
], | ||
[ | ||
"order", | ||
[ | ||
"char", | ||
"20" | ||
] | ||
], | ||
[ | ||
"family", | ||
[ | ||
"char", | ||
"20" | ||
] | ||
], | ||
[ | ||
"genus", | ||
[ | ||
"char", | ||
"20" | ||
] | ||
], | ||
[ | ||
"species", | ||
[ | ||
"char", | ||
"20" | ||
] | ||
], | ||
[ | ||
"subspecies", | ||
[ | ||
"char", | ||
"20" | ||
] | ||
], | ||
[ | ||
"common_name", | ||
[ | ||
"char", | ||
"40" | ||
] | ||
], | ||
[ | ||
"trait_value", | ||
[ | ||
"ct-double" | ||
] | ||
] | ||
], | ||
"ct_column": "'trait'", | ||
"ct_names": [ | ||
"female_maturity_d", | ||
"litter_or_clutch_size_n", | ||
"litters_or_clutches_per_y", | ||
"adult_body_mass_g", | ||
"maximum_longevity_y", | ||
"gestation_d", | ||
"weaning_d", | ||
"birth_or_hatching_weight_g", | ||
"weaning_weight_g", | ||
"egg_mass_g", | ||
"incubation_d", | ||
"fledging_age_d", | ||
"longevity_y", | ||
"male_maturity_d", | ||
"inter_litter_or_interbirth_interval_y", | ||
"female_body_mass_g", | ||
"male_body_mass_g", | ||
"no_sex_body_mass_g", | ||
"egg_width_mm", | ||
"egg_length_mm", | ||
"fledging_mass_g", | ||
"adult_svl_cm", | ||
"male_svl_cm", | ||
"female_svl_cm", | ||
"birth_or_hatching_svl_cm", | ||
"female_svl_at_maturity_cm", | ||
"female_body_mass_at_maturity_g", | ||
"no_sex_svl_cm", | ||
"no_sex_maturity_d" | ||
], | ||
"delimiter": "','" | ||
} | ||
}, | ||
"tags": [ | ||
"Taxon > Mammals", | ||
"Data Type > Compilation" | ||
], | ||
"urls": { | ||
"main": "http://esapubs.org/archive/ecol/E096/269/Data_Files/Amniote_Database_Aug_2015.csv" | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,12 @@ | ||
{ | ||
"citation": "Baldridge, Elita, A Data-intensive Assessment of the Species Abundance Distribution(2013). All Graduate Theses and Dissertations. Paper 4276.", | ||
"description": "Community abundance data for fish, reptiles, amphibians, beetles, spiders, and birds, compiled from the literature by Elita Baldridge.", | ||
"name": "Miscellaneous Abundance Database (figshare 2012)", | ||
"shortname": "MiscAbundanceDB", | ||
"tables": {}, | ||
"urls": { | ||
"citations": "http://files.figshare.com/2023506/Citations_table_abundances.csv", | ||
"main": "http://files.figshare.com/2023547/Species_abundances.csv", | ||
"sites": "http://files.figshare.com/2023504/Sites_table_abundances.csv" | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,18 @@ | ||
{ | ||
"citation": "Tamburello N, Cote IM, Dulvy NK (2015) Energy and the scaling of animal space use. The American Naturalist 186(2):196-211. http://dx.doi.org/10.1086/682070.", | ||
"description": "Database of mean species masses and corresponding empirically measured home range sizes for 569 vertebrate species from across the globe, including birds, mammals, reptiles, and fishes.", | ||
"name": "Database of Vertebrate Home Range Sizes- Tamburello , et al., 2015", | ||
"ref": "http://datadryad.org/resource/doi:10.5061/dryad.q5j65/1", | ||
"shortname": "HomeRanges", | ||
"tables": { | ||
"ranges": { | ||
"cleanup": "Cleanup(correct_invalid_value, nulls=['NA'])" | ||
} | ||
}, | ||
"tags": [ | ||
"Data Type > Compilation" | ||
], | ||
"urls": { | ||
"ranges": "http://datadryad.org/bitstream/handle/10255/dryad.84768/Tamburelloetal_HomeRangeDatabase.csv" | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,49 @@ | ||
{ | ||
"citation": "Cover, density, and demographics of shortgrass steppe plants mapped 1997-2010 in permanent grazed and ungrazed quadrats. Chengjin Chu, John Norman, Robert Flynn, Nicole Kaplan, William K. Lauenroth, and Peter B. Adler. Ecology 2013 94:6, 1435-1435.", | ||
"description": "This data set maps and analyzes demographic rates of many common plant species in the shortgrass steppe of North America under grazed and ungrazed conditions.", | ||
"name": "Shortgrass steppe plants.", | ||
"ref": "http://www.esajournals.org/doi/abs/10.1890/13-0121.1", | ||
"shortname": "Steppe_plants_2013", | ||
"tables": { | ||
"allrecords_cover": { | ||
"cleanup": "Cleanup(correct_invalid_value, nulls=['NA'])" | ||
}, | ||
"allrecords_density": { | ||
"cleanup": "Cleanup(correct_invalid_value, nulls=['NA'])" | ||
}, | ||
"daily_climate": { | ||
"cleanup": "Cleanup(correct_invalid_value, nulls=['NA'])" | ||
}, | ||
"quad_info": { | ||
"cleanup": "Cleanup(correct_invalid_value, nulls=['NA'])" | ||
}, | ||
"quad_inventory": { | ||
"cleanup": "Cleanup(correct_invalid_value, nulls=['NA'])" | ||
}, | ||
"quad_stocking_rate": { | ||
"cleanup": "Cleanup(correct_invalid_value, nulls=['NA'])" | ||
}, | ||
"species_list": { | ||
"cleanup": "Cleanup(correct_invalid_value, nulls=['NA'])" | ||
}, | ||
"species_name_changes": { | ||
"cleanup": "Cleanup(correct_invalid_value, nulls=['NA'])" | ||
} | ||
}, | ||
"tags": [ | ||
"Taxon > Plants", | ||
"Spatial Scale > Local", | ||
"Data Type > Time Series", | ||
"Data Type > Observational" | ||
], | ||
"urls": { | ||
"allrecords_cover": "http://esapubs.org/archive/ecol/E094/128/allrecords_cover.csv", | ||
"allrecords_density": "http://esapubs.org/archive/ecol/E094/128/allrecords_density.csv", | ||
"daily_climate": "http://esapubs.org/archive/ecol/E094/128/daily_climate_data.csv", | ||
"quad_info": "http://esapubs.org/archive/ecol/E094/128/quad_info.csv", | ||
"quad_inventory": "http://esapubs.org/archive/ecol/E094/128/quad_inventory.csv", | ||
"quad_stocking_rate": "http://esapubs.org/archive/ecol/E094/128/quad_stocking_rate.csv", | ||
"species_list": "http://esapubs.org/archive/ecol/E094/128/species_list.csv", | ||
"species_name_changes": "http://esapubs.org/archive/ecol/E094/128/species_name_changes.csv" | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,39 @@ | ||
{ | ||
"citation": "Colin T. Kremer, Jacob P. Gillette, Lars G. Rudstam, Pal Brettum, and Robert Ptacnik. 2014. A compendium of cell and natural unit biovolumes for >1200 freshwater phytoplankton species. Ecology 95:2984.", | ||
"description": "Sampling phytoplankton communities basing on cell size.", | ||
"name": "Biovolumes for freshwater phytoplankton - Colin et al. 2014", | ||
"ref": "http://www.esapubs.org/archive/ecol/E095/257/", | ||
"shortname": "PhytoplankonBiovolume", | ||
"tables": { | ||
"bvd_genus_ag": { | ||
"cleanup": "Cleanup(correct_invalid_value, nulls=['NA'])" | ||
}, | ||
"bvd_genus_raw": { | ||
"cleanup": "Cleanup(correct_invalid_value, nulls=['NA'])" | ||
}, | ||
"bvd_raw": { | ||
"cleanup": "Cleanup(correct_invalid_value, nulls=['NA'])" | ||
}, | ||
"bvd_species_ag": { | ||
"cleanup": "Cleanup(correct_invalid_value, nulls=['NA'])" | ||
}, | ||
"bvd_species_raw": { | ||
"cleanup": "Cleanup(correct_invalid_value, nulls=['NA'])" | ||
}, | ||
"taxa_table": { | ||
"cleanup": "Cleanup(correct_invalid_value, nulls=['NA'])" | ||
} | ||
}, | ||
"tags": [ | ||
"Taxon > Phytoplankton", | ||
"Data Type > Compilation" | ||
], | ||
"urls": { | ||
"bvd_genus_ag": "http://esapubs.org/archive/ecol/E095/257/bvd_genus_ag_030614.csv", | ||
"bvd_genus_raw": "http://esapubs.org/archive/ecol/E095/257/bvd_genus_raw_030614.csv", | ||
"bvd_raw": "http://esapubs.org/archive/ecol/E095/257/bvd_raw_052814.csv", | ||
"bvd_species_ag": "http://esapubs.org/archive/ecol/E095/257/bvd_species_ag_030614.csv", | ||
"bvd_species_raw": "http://esapubs.org/archive/ecol/E095/257/bvd_species_raw_030614.csv", | ||
"taxa_table": "http://esapubs.org/archive/ecol/E095/257/taxa_table_030614.csv" | ||
} | ||
} |
Oops, something went wrong.