Add WEPP Location ID to database (PLATTS ID)

Addressing part of Issue #6, add a new field (`wepp_id`) to the output database. This field holds a reference to a unique plant identifier in the widely-used PLATTS-WEPP dataset. This should allow for intercomparisons with an independently derived dataset, and will be helpful to users who want to build a composite dataset for their analyses. The `wepp_id` field is a delimited field, with LOCATIONID values separated by the pipe character (|) if multiple LOCATIONIDs match to a single plant. With this update, there are 16460 plants with known `wepp_id`s, which account for 4312.6 GW in total (about 80% of all capacity in the output database). There are no changes to plants in the database except for the addition of this `wepp_id`. Database Version: 1.1.10
wri · Dec 10, 2018 · 383d6b3 · 383d6b3
1 parent 54fec91
commit 383d6b3
Show file tree

Hide file tree

Showing 29 changed files with 2,003,638 additions and 1,873,151 deletions.
diff --git a/build_databases/build_global_power_plant_database.py b/build_databases/build_global_power_plant_database.py
@@ -5,11 +5,10 @@
 Builds the Global Power Plant Database from various data sources.
 - Log build to DATABASE_BUILD_LOG_FILE
 - Use country and fuel information as specified in powerplant_database.py
+- Use matches/concordances as specified in powerplants_database.py
 
 TO-DOS:
 - Alias list for power plants
-- Primary fuel type designation
-- Link to Platts/WEPP ID
 """
 
 import csv
@@ -196,6 +195,11 @@
 estimated_plants = pw.estimate_generation(core_database)
 print('...estimated for {0} plants.'.format(estimated_plants))
 
+# STEP 4.1: Add WEPP ID matches
+pw.add_wepp_id(core_database)
+if DATA_DUMP:
+	pw.add_wepp_id(datadump)
+
 # STEP 5: Write the Global Power Plant Database
 for dbname, data in database_additions.iteritems():
 	print("Added {0} plants ({1} MW) from {2}.".format(data['count'], data['capacity'], dbname))

diff --git a/output_database/DATABASE_VERSION b/output_database/DATABASE_VERSION
@@ -1,2 +1,2 @@
-1.1.9
+1.1.10
 
diff --git a/output_database/global_power_plant_database.csv b/output_database/global_power_plant_database.csv
diff --git a/output_database/global_power_plant_database_country_summary.csv b/output_database/global_power_plant_database_country_summary.csv
diff --git a/powerplant_database.py b/powerplant_database.py
@@ -32,6 +32,7 @@
 COUNTRY_NAMES_THESAURUS_FILE	= os.path.join(RESOURCES_DIR, "country_names_thesaurus.csv")
 COUNTRY_INFORMATION_FILE		= os.path.join(RESOURCES_DIR, "country_information.csv")
 MASTER_PLANT_CONCORDANCE_FILE	= os.path.join(RESOURCES_DIR, "master_plant_concordance.csv")
+WEPP_CONCORDANCE_FILE 			= os.path.join(RESOURCES_DIR, "master_wepp_concordance.csv")
 SOURCE_THESAURUS_FILE			= os.path.join(RESOURCES_DIR, "sources_thesaurus.csv")
 GENERATION_FILE      			= os.path.join(RESOURCES_DIR, "generation_by_country_by_fuel_2014.csv")
 
@@ -59,15 +60,17 @@ def __init__(self, plant_idnr, plant_name, plant_country,
 		plant_other_fuel=NO_DATA_SET,
 		plant_generation=NO_DATA_OTHER,
 		plant_commissioning_year=NO_DATA_NUMERIC,
-		plant_estimated_generation_gwh=NO_DATA_NUMERIC
+		plant_estimated_generation_gwh=NO_DATA_NUMERIC,
+		plant_wepp_id=NO_DATA_UNICODE
 	):
 
 		# check and set data for attributes that should be unicode
 		unicode_attributes = {
 			'idnr': plant_idnr, 'name': plant_name, 'country': plant_country,
 			'owner': plant_owner, 'nat_lang': plant_nat_lang,
 			'url': plant_source_url, 'coord_source': plant_coord_source,
-			'primary_fuel': plant_primary_fuel
+			'primary_fuel': plant_primary_fuel,
+			'wepp_id': plant_wepp_id
 		}
 
 		for attribute, input_parameter in unicode_attributes.iteritems():
@@ -819,6 +822,41 @@ def make_plant_concordance(master_plant_condordance_file=MASTER_PLANT_CONCORDANC
 			}
 	return plant_concordance
 
+def add_wepp_id(powerplant_dictionary, wepp_matches_file=WEPP_CONCORDANCE_FILE):
+	"""
+	Set WEPP Location ID for each plant, if a match is available.
+	Modifies powerplant_dictionary in place.
+	Parameters
+	----------
+	powerplant_dictionary : dict
+		Dictionary of all PowerPlant objects.
+	wepp_concordance_file : path
+		Path to file with WEPP Location ID matches.
+	Returns
+	-------
+	None.
+	"""
+	wepp_match_count = 0
+	with open(wepp_matches_file, 'rbU') as f:
+		csvreader = csv.DictReader(f)
+		for row in csvreader:
+			if row['wepp_location_id']:
+				gppd_id = str(row['gppd_idnr'])
+				wepp_id = str(row['wepp_location_id'])
+				if gppd_id in powerplant_dictionary:
+					# test that we haven't already set this wepp id
+					try:
+						if not powerplant_dictionary[gppd_id].wepp_id:
+							powerplant_dictionary[gppd_id].wepp_id = wepp_id
+							wepp_match_count += 1
+						else:
+							print(u"Error: Duplicate WEPP match for plant {0}".format(gppd_id))
+					except:
+						print(u"Error: plant {0} does not have wepp_id attribute".format(gppd_id))
+				else:
+					print(u"Error: Attempt to match WEPP ID {0} to non-existant plant {1}".format(wepp_id, gppd_id))
+	print(u"Added {0} matches to WEPP plants.".format(wepp_match_count))
+
 ### STRING CLEANING ###
 
 def format_string(value, encoding=UNICODE_ENCODING):
@@ -1097,6 +1135,7 @@ def _dict_row(powerplant):
 			ret['latitude'] = NO_DATA_NUMERIC
 			ret['longitude'] = NO_DATA_NUMERIC
 		ret['geolocation_source'] = powerplant.coord_source.encode(UNICODE_ENCODING)
+		ret['wepp_id'] = powerplant.wepp_id.encode(UNICODE_ENCODING)
 		ret['commissioning_year'] = powerplant.commissioning_year
 		# handle fuel
 		ret['primary_fuel'] = powerplant.primary_fuel
@@ -1140,6 +1179,7 @@ def _dict_row(powerplant):
 		"source",
 		"url",
 		"geolocation_source",
+		"wepp_id",
 		"year_of_capacity_data",
 		"generation_gwh_2013",
 		"generation_gwh_2014",
@@ -1234,6 +1274,9 @@ def read_csv_file_to_dict(filename):
 			# check if geolocation source is empty string
 			if not row['geolocation_source']:
 				row['geolocation_source'] = None
+			# check if wepp_id is empty string
+			if not row['wepp_id']:
+				row['wepp_id'] = None
 			# add row to output dict
 			pdb[row['gppd_idnr']] = row
 		return pdb
@@ -1291,6 +1334,7 @@ def write_sqlite_file(plants_dict, filename, return_connection=False):
 						source TEXT,
 						url TEXT,
 						geolocation_source TEXT,
+						wepp_id TEXT,
 						year_of_capacity_data INTEGER,
 						generation_gwh_2013 REAL,
 						generation_gwh_2014 REAL,
@@ -1304,7 +1348,7 @@ def write_sqlite_file(plants_dict, filename, return_connection=False):
 	c.execute('begin')
 	for k, p in plants_dict.iteritems():
 		stmt = u'''INSERT INTO powerplants VALUES (
-					?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)'''
+					?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)'''
 		vals = (
 				p['country'],
 				p['country_long'],
@@ -1322,6 +1366,7 @@ def write_sqlite_file(plants_dict, filename, return_connection=False):
 				p['source'],
 				p['url'],
 				p['geolocation_source'],
+				p['wepp_id'],
 				p['year_of_capacity_data'],
 				p['generation_gwh_2013'],
 				p['generation_gwh_2014'],