Skip to content

Commit

Permalink
Merge pull request #1521 from henrykironde/bbs50
Browse files Browse the repository at this point in the history
Update breed-bird-survey-50stop
  • Loading branch information
henrykironde committed Sep 28, 2020
2 parents 5d2006e + 903f3cc commit fe5148b
Show file tree
Hide file tree
Showing 2 changed files with 137 additions and 77 deletions.
211 changes: 135 additions & 76 deletions scripts/breed_bird_survey_50stop.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,14 +38,16 @@ def __init__(self, **kwargs):
self.ref = "http://www.pwrc.usgs.gov/BBS/"
self.keywords = ["birds", "continental-scale"]
self.retriever_minimum_version = '2.0.dev'
self.version = '2.0.0'
self.version = '3.0.0'
base_url = "https://www.sciencebase.gov/catalog/file/get/5ea04e9a82cefae35a129d65?f=__disk__"
self.urls = {
"counts": "ftp://ftpext.usgs.gov/pub/er/md/laurel/BBS/DataFiles/50-StopData/1997ToPresent_SurveyWide/",
"routes": "ftp://ftpext.usgs.gov/pub/er/md/laurel/BBS/DataFiles/routes.zip",
"weather": "ftp://ftpext.usgs.gov/pub/er/md/laurel/BBS/DataFiles/Weather.zip",
"region_codes": "ftp://ftpext.usgs.gov/pub/er/md/laurel/BBS/DataFiles/RegionCodes.txt",
"species": "ftp://ftpext.usgs.gov/pub/er/md/laurel/BBS/DataFiles/SpeciesList.txt"}

"counts": base_url + "40%2Fe4%2F92%2F40e4925dde30ffd926b1b4d540b485d8a9a320ba",
"routes": base_url + "5d%2Fca%2F74%2F5dca74b1e3e1c21f18443e8f27c38bf0e2b2a234&allowOpen=true",
"weather": base_url + "87%2Fb5%2F1d%2F87b51d999ae1ad18838aa60851e9bcff4498ac8d",
"migrants": base_url + "bf%2Fe5%2Ff6%2Fbfe5f6834f85cc1e31edf67b5eb825b9abff5806",
"Vehicledata": base_url + "a9%2F97%2F2b%2Fa9972b26aaeb48bf9425ed21681312b4cc063a7c",
"species": base_url + "6f%2F16%2F1f%2F6f161fc7c7db1dcaf1259deb02d824700f280460&allowOpen=true",
}
if parse_version(VERSION) <= parse_version("2.0.0"):
self.shortname = self.name
self.name = self.title
Expand All @@ -68,88 +70,146 @@ def download(self, engine=None, debug=False):
engine = self.engine

# Species table
table = Table("species", cleanup=Cleanup(), contains_pk=True,
header_rows=10)

table.columns = [("species_id", ("pk-int",)),
("AOU", ("int",)),
("english_common_name", ("char", 50)),
("french_common_name", ("char", 50)),
("spanish_common_name", ("char", 50)),
("sporder", ("char", 30)),
("family", ("char", 30)),
("genus", ("char", 30)),
("species", ("char", 50)),
]
table = Table("species", cleanup=Cleanup(), contains_pk=True, header_rows=11)
table.columns = [
("species_id", ("pk-int",)),
("AOU", ("int",)),
("english_common_name", ("char", 50)),
("french_common_name", ("char", 50)),
("spanish_common_name", ("char", 50)),
("sporder", ("char", 30)),
("family", ("char", 30)),
("genus", ("char", 30)),
("species", ("char", 50)),
]
table.fixed_width = [7, 6, 51, 51, 51, 51, 51, 51, 50]

engine.table = table
engine.create_table()
engine.insert_data_from_url(self.urls["species"])

# Routes table
engine.download_files_from_archive(
self.urls["routes"], ["routes.csv"])
engine.download_files_from_archive(self.urls["routes"], ["routes.csv"],
archive_name="routes.zip")
engine.auto_create_table(Table("routes", cleanup=Cleanup()),
filename="routes.csv")
engine.insert_data_from_file(engine.format_filename("routes.csv"))

# Weather table
if not os.path.isfile(engine.format_filename("weather_new.csv")):
engine.download_files_from_archive(self.urls["weather"],
["weather.csv"])
read = open_fr(engine.format_filename("weather.csv"))
write = open_fw(engine.format_filename("weather_new.csv"))
print("Cleaning weather data...")
for line in read:
values = line.split(',')
newvalues = []
for value in values:

if ':' in value:
newvalues.append(value.replace(':', ''))
elif value == "N":
newvalues.append(None)
else:
newvalues.append(value)
write.write(','.join(str(value) for value in newvalues))
write.close()
read.close()
engine.auto_create_table(Table("weather", pk="RouteDataId",
engine.download_files_from_archive(self.urls["weather"], ["weather.csv"],
archive_name="weather.zip")
engine.auto_create_table(Table("weather",
pk="RouteDataId",
cleanup=self.cleanup_func_table),
filename="weather_new.csv")
filename="weather.csv")
engine.insert_data_from_file(engine.format_filename("weather.csv"))

# Migrations data
engine.download_files_from_archive(self.urls["migrants"],
archive_name="MigrantNonBreeder.zip")
engine.extract_zip(
engine.format_filename("MigrantNonBreeder/Migrants.zip"),
engine.format_filename("Migrant"),
)
engine.extract_zip(
engine.format_filename("MigrantNonBreeder/MigrantSummary.zip"),
engine.format_filename("MigrantSummary"),
)

table = Table("migrants", cleanup=Cleanup())
table.columns = [
('routedataid', ('int',)), ('countrynum', ('int',)),
('statenum', ('int',)), ('route', ('int',)), ('rpid', ('int',)),
('year', ('int',)), ('aou', ('int',)), ('stop1', ('int',)),
('stop2', ('int',)), ('stop3', ('int',)), ('stop4', ('int',)),
('stop5', ('int',)), ('stop6', ('int',)), ('stop7', ('int',)),
('stop8', ('int',)), ('stop9', ('int',)), ('stop10', ('int',)),
('stop11', ('int',)), ('stop12', ('int',)), ('stop13', ('int',)),
('stop14', ('int',)), ('stop15', ('int',)), ('stop16', ('int',)),
('stop17', ('int',)), ('stop18', ('int',)), ('stop19', ('int',)),
('stop20', ('int',)), ('stop21', ('int',)), ('stop22', ('int',)),
('stop23', ('int',)), ('stop24', ('int',)), ('stop25', ('int',)),
('stop26', ('int',)), ('stop27', ('int',)), ('stop28', ('int',)),
('stop29', ('int',)), ('stop30', ('int',)), ('stop31', ('int',)),
('stop32', ('int',)), ('stop33', ('int',)), ('stop34', ('int',)),
('stop35', ('int',)), ('stop36', ('int',)), ('stop37', ('int',)),
('stop38', ('int',)), ('stop39', ('int',)), ('stop40', ('int',)),
('stop41', ('int',)), ('stop42', ('int',)), ('stop43', ('int',)),
('stop44', ('int',)), ('stop45', ('int',)), ('stop46', ('int',)),
('stop47', ('int',)), ('stop48', ('int',)), ('stop49', ('int',)),
('stop50', ('int',))
]
engine.table = table
engine.create_table()
engine.insert_data_from_file(engine.format_filename("Migrant/Migrants.csv"))

table = Table("migrantsummary", cleanup=Cleanup())
table.columns = [('routedataid', ('int',)), ('countrynum', ('int',)),
('statenum', ('int',)), ('route', ('int',)),
('rpid', ('int',)), ('year', ('int',)), ('aou', ('int',)),
('count10', ('int',)), ('count20', ('int',)),
('count30', ('int',)), ('count40', ('int',)),
('count50', ('int',)), ('stoptotal', ('int',)),
('speciestotal', ('int',))]
engine.table = table
engine.create_table()
engine.insert_data_from_file(
engine.format_filename("weather_new.csv"))

# Region_codes table
table = Table("region_codes", pk=False, header_rows=12,
fixed_width=[11, 11, 30])

def regioncodes_cleanup(value, engine):
replace = {
chr(225): "a",
chr(233): "e",
chr(237): "i",
chr(243): "o"}
newvalue = str(value)
for key in list(replace.keys()):
if key in newvalue:
newvalue = newvalue.replace(key, replace[key])
return newvalue

table.cleanup = Cleanup(regioncodes_cleanup)

table.columns = [("countrynum", ("int",)),
("regioncode", ("int",)),
("regionname", ("char", 30))]
engine.format_filename("MigrantSummary/MigrantSummary.csv"))

table = Table("vehicledata", cleanup=Cleanup())
table.columns = [
('routedataid', ('int',)), ('countrynum', ('int',)),
('statenum', ('int',)), ('route', ('int',)), ('rpid', ('int',)),
('year', ('int',)), ('recordedcar', ('char',)), ('car1', ('int',)),
('car2', ('int',)), ('car3', ('int',)), ('car4', ('int',)),
('car5', ('int',)), ('car6', ('int',)), ('car7', ('int',)),
('car8', ('int',)), ('car9', ('int',)), ('car10', ('int',)),
('car11', ('int',)), ('car12', ('int',)), ('car13', ('int',)),
('car14', ('int',)), ('car15', ('int',)), ('car16', ('int',)),
('car17', ('int',)), ('car18', ('int',)), ('car19', ('int',)),
('car20', ('int',)), ('car21', ('int',)), ('car22', ('int',)),
('car23', ('int',)), ('car24', ('int',)), ('car25', ('int',)),
('car26', ('int',)), ('car27', ('int',)), ('car28', ('int',)),
('car29', ('int',)), ('car30', ('int',)), ('car31', ('int',)),
('car32', ('int',)), ('car33', ('int',)), ('car34', ('int',)),
('car35', ('int',)), ('car36', ('int',)), ('car37', ('int',)),
('car38', ('int',)), ('car39', ('int',)), ('car40', ('int',)),
('car41', ('int',)), ('car42', ('int',)), ('car43', ('int',)),
('car44', ('int',)), ('car45', ('int',)), ('car46', ('int',)),
('car47', ('int',)), ('car48', ('int',)), ('car49', ('int',)),
('car50', ('int',)), ('noise1', ('int',)), ('noise2', ('int',)),
('noise3', ('int',)), ('noise4', ('int',)), ('noise5', ('int',)),
('noise6', ('int',)), ('noise7', ('int',)), ('noise8', ('int',)),
('noise9', ('int',)), ('noise10', ('int',)), ('noise11', ('int',)),
('noise12', ('int',)), ('noise13', ('int',)), ('noise14', ('int',)),
('noise15', ('int',)), ('noise16', ('int',)), ('noise17', ('int',)),
('noise18', ('int',)), ('noise19', ('int',)), ('noise20', ('int',)),
('noise21', ('int',)), ('noise22', ('int',)), ('noise23', ('int',)),
('noise24', ('int',)), ('noise25', ('int',)), ('noise26', ('int',)),
('noise27', ('int',)), ('noise28', ('int',)), ('noise29', ('int',)),
('noise30', ('int',)), ('noise31', ('int',)), ('noise32', ('int',)),
('noise33', ('int',)), ('noise34', ('int',)), ('noise35', ('int',)),
('noise36', ('int',)), ('noise37', ('int',)), ('noise38', ('int',)),
('noise39', ('int',)), ('noise40', ('int',)), ('noise41', ('int',)),
('noise42', ('int',)), ('noise43', ('int',)), ('noise44', ('int',)),
('noise45', ('int',)), ('noise46', ('int',)), ('noise47', ('int',)),
('noise48', ('int',)), ('noise49', ('int',)), ('noise50', ('int',))
]
engine.table = table
engine.create_table()
engine.download_files_from_archive(self.urls["Vehicledata"],
archive_name="VehicleData.zip")
engine.extract_zip(
engine.format_filename("VehicleData/VehicleData.zip"),
engine.format_filename("VehicleData"),
)
engine.insert_data_from_file(
engine.format_filename("VehicleData/VehicleData.csv"))

engine.insert_data_from_url(self.urls["region_codes"])

# Counts table
table = Table("counts", pk=False, delimiter=',')
engine.download_files_from_archive(self.urls["counts"],
archive_name="50-StopData.zip")
table.columns = [("RouteDataID", ("int",)),
("countrynum", ("int",)),
("statenum", ("int",)),
Expand Down Expand Up @@ -217,15 +277,14 @@ def regioncodes_cleanup(value, engine):
try:
print("Inserting data from part " + part + "...")
try:
"1997ToPresent_SurveyWide"
engine.table.cleanup = Cleanup()
engine.insert_data_from_archive(self.urls["counts"] +
"Fifty" + part + ".zip",
["fifty" + part + ".csv"])
engine.extract_zip(
engine.format_filename("50-StopData/1997ToPresent_SurveyWide/Fifty" + part + ".zip"),
engine.format_filename("fifty" + part + ".csv"),
)
except:
print(
"Failed bulk insert on " +
part +
", inserting manually.")
print("fifty{}: Failed bulk insert on, inserting manually.".format(part))
engine.connection.rollback()
engine.table.cleanup = self.cleanup_func_clean
engine.insert_data_from_archive(self.urls["counts"] +
Expand Down
3 changes: 2 additions & 1 deletion version.txt
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ bird_migration_data.json,1.0.2
bird_size.json,1.2.3
breast_cancer_wi.json,1.2.2
breed_bird_survey.py,4.0.0
breed_bird_survey_50stop.py,2.0.0
breed_bird_survey_50stop.py,3.0.0
breed_bird_survey_nlcd.json,1.0.2
bupa_liver_disorders.json,1.0.0
butterfly_population_network.json,1.2.2
Expand Down Expand Up @@ -85,6 +85,7 @@ predator_prey_size_marine.py,2.0.2
predicts.py,1.0.4
prism_climate.py,1.2.3
socean_diet_data.py,1.0.4
soil_db.json,1.2.2
sonoran_desert.json,1.0.0
species_exctinction_rates.json,1.0.1
streamflow_conditions.json,1.0.1
Expand Down

0 comments on commit fe5148b

Please sign in to comment.