# Selecting existng renewable energy sites closer from the industrial sites

In [1]:
from IPython.display import display, HTML

display(HTML("<style>.container { width:100% !important; }</style>"))

In [2]:
import pandas as pd

In [3]:
# importing three nearest substations which has caclulated solar PV
file_path = "C:/Users/USER/Desktop/MSc_Data_Science/Ind_Project/GHD/Project/Data/Nearest_3PS_solarEnergy.csv"

# Load the CSV file into a DataFrame
ps_solar = pd.read_csv(file_path, encoding='latin1')

# Display DataFrame (ps_solar = primary substations solar energy)
ps_solar

Unnamed: 0,OBJECTID,PS_Name,total_energy_output_kWh
0,1,albion st,353145.200
1,2,annie pit,3759156.000
2,3,ardwick,0.000
3,4,arnside,1734.501
4,5,ashton (golborne),13626.050
...,...,...,...
1774,1775,Worfield 33/11kv,195845.800
1775,1776,Worksop West 33 11kv S Stn,49767.140
1776,1777,Worthington 33 11kv S Stn,753286.100
1777,1778,Wrangle 33 11kv S Stn,939024.900


# Importing NearTable run on ArcGIS Pro. It shows the nearest three PS locations from each industrial site

In [5]:
# nt = near table (distance calculated from each industrial site to power substation using ArcGIS Pro 
# 1= nearest, 2= second nearest, 3 = furthest within 3)
# IN_FID = Industrial sites
# NEAR_FID = PS sites
# NEAR_DIST = distance from industrial sites to PS sites

file_path = "C:/Users/USER/Desktop/MSc_Data_Science/Ind_Project/GHD/Project/Data/nearTable_SolarEnergy.csv"

# Load the CSV file into a DataFrame
ps_solar_nt = pd.read_csv(file_path)     

# Display DataFrame
ps_solar_nt

Unnamed: 0,OBJECTID,IN_FID,NEAR_FID,NEAR_DIST,NEAR_RANK
0,1,1,620,385.934067,1
1,2,1,616,1459.495912,2
2,3,1,612,1563.663983,3
3,4,2,1763,641.784439,1
4,5,2,1550,1072.159587,2
...,...,...,...,...,...
3175,3176,1059,778,2224.068208,2
3176,3177,1059,704,2828.328385,3
3177,3178,1060,1771,571.441426,1
3178,3179,1060,1516,2111.129814,2


In [6]:
# Drop the OBJECTID from the nt DataFrame
ps_solar_nt = ps_solar_nt.drop(columns=['OBJECTID'])
ps_solar_nt

Unnamed: 0,IN_FID,NEAR_FID,NEAR_DIST,NEAR_RANK
0,1,620,385.934067,1
1,1,616,1459.495912,2
2,1,612,1563.663983,3
3,2,1763,641.784439,1
4,2,1550,1072.159587,2
...,...,...,...,...
3175,1059,778,2224.068208,2
3176,1059,704,2828.328385,3
3177,1060,1771,571.441426,1
3178,1060,1516,2111.129814,2


In [7]:
# Rename the NEAR_FID column to OBJECTID in the nt DataFrame
# NEAR_FID are OBJECTID of power station's table and IN_FID = OBJECTID of NAEI

ps_solar_nt.rename(columns={'NEAR_FID': 'OBJECTID'}, inplace=True)

ps_solar_nt

Unnamed: 0,IN_FID,OBJECTID,NEAR_DIST,NEAR_RANK
0,1,620,385.934067,1
1,1,616,1459.495912,2
2,1,612,1563.663983,3
3,2,1763,641.784439,1
4,2,1550,1072.159587,2
...,...,...,...,...
3175,1059,778,2224.068208,2
3176,1059,704,2828.328385,3
3177,1060,1771,571.441426,1
3178,1060,1516,2111.129814,2


In [8]:
# Merge the two DataFrames on the OBJECTID column
merged_df = pd.merge(ps_solar, ps_solar_nt, on='OBJECTID', how='inner')

merged_df

Unnamed: 0,OBJECTID,PS_Name,total_energy_output_kWh,IN_FID,NEAR_DIST,NEAR_RANK
0,1,albion st,353145.200,144,3123.620150,2
1,2,annie pit,3759156.000,87,3452.212684,3
2,3,ardwick,0.000,279,592.545660,3
3,3,ardwick,0.000,383,631.967570,2
4,4,arnside,1734.501,170,3591.703375,2
...,...,...,...,...,...,...
3175,1776,Worksop West 33 11kv S Stn,49767.140,531,441.727388,1
3176,1777,Worthington 33 11kv S Stn,753286.100,209,6290.160876,2
3177,1777,Worthington 33 11kv S Stn,753286.100,700,2648.069907,2
3178,1778,Wrangle 33 11kv S Stn,939024.900,77,3611.650649,1


In [9]:
merged_df.columns

Index(['OBJECTID', 'PS_Name', 'total_energy_output_kWh', 'IN_FID', 'NEAR_DIST',
       'NEAR_RANK'],
      dtype='object')

In [14]:
# # Drop the specified columns from the nt DataFrame
# merged_df = merged_df.drop(columns=['PF'])
# merged_df

In [10]:
# Sort the dataframe by IN_FID and then by NEAR_DIST to ensure proper ordering
sorted_df = merged_df.sort_values(by=['IN_FID', 'NEAR_DIST'])

# Add a new rank column based on sorted distances
sorted_df['Distance_Rank'] = sorted_df.groupby('IN_FID')['NEAR_DIST'].rank(method='first').astype(int)

# Pivot the table based on IN_FID and the new Distance_Rank, using NEAR_DIST as the values
piv_df = sorted_df.pivot_table(index='IN_FID', columns='Distance_Rank', values='total_energy_output_kWh', aggfunc='first')

# Rename the columns to Dist1_Headspace, Dist2_Headspace, etc.
piv_df.columns = [f'Dist{int(col)}_total_energy_output_kWh' for col in piv_df.columns]

# Reset the index to bring IN_FID back as a column
piv_df.reset_index(inplace=True)

# Merge the pivoted DataFrame back with the original DataFrame to retain other columns
final_df = pd.merge(merged_df.drop_duplicates(subset=['IN_FID']), piv_df, on='IN_FID', how='left')

# Display the final DataFrame
final_df


Unnamed: 0,OBJECTID,PS_Name,total_energy_output_kWh,IN_FID,NEAR_DIST,NEAR_RANK,Dist1_total_energy_output_kWh,Dist2_total_energy_output_kWh,Dist3_total_energy_output_kWh
0,1,albion st,353145.200,144,3123.620150,2,4143264.0,3.531452e+05,2070968.0
1,2,annie pit,3759156.000,87,3452.212684,3,2570153.0,6.131353e+05,3759156.0
2,3,ardwick,0.000,279,592.545660,3,0.0,0.000000e+00,0.0
3,3,ardwick,0.000,383,631.967570,2,0.0,0.000000e+00,0.0
4,4,arnside,1734.501,170,3591.703375,2,2568242.0,1.734501e+03,225852.0
...,...,...,...,...,...,...,...,...,...
1055,1624,Plymstock South,2418455.000,709,2011.816177,2,7107385.0,2.418455e+06,2418455.0
1056,1633,Quatt 33/11kv,12508.220,661,5671.070399,2,5431456.0,1.250822e+04,195845.8
1057,1660,Sandiacre 33 11kv S Stn,224278.700,210,5544.167160,3,12911910.0,8.981484e+04,224278.7
1058,1682,Spilsby 33 11kv S Stn,842193.300,77,11523.430970,3,939024.9,1.439996e+06,842193.3


In [11]:
# Drop the specified columns from the nt DataFrame
final_df = final_df.drop(columns=['PS_Name', 'total_energy_output_kWh', 'NEAR_RANK', 'NEAR_DIST'])

final_df

Unnamed: 0,OBJECTID,IN_FID,Dist1_total_energy_output_kWh,Dist2_total_energy_output_kWh,Dist3_total_energy_output_kWh
0,1,144,4143264.0,3.531452e+05,2070968.0
1,2,87,2570153.0,6.131353e+05,3759156.0
2,3,279,0.0,0.000000e+00,0.0
3,3,383,0.0,0.000000e+00,0.0
4,4,170,2568242.0,1.734501e+03,225852.0
...,...,...,...,...,...
1055,1624,709,7107385.0,2.418455e+06,2418455.0
1056,1633,661,5431456.0,1.250822e+04,195845.8
1057,1660,210,12911910.0,8.981484e+04,224278.7
1058,1682,77,939024.9,1.439996e+06,842193.3


In [12]:
# To check data types of all columns in a DataFrame
print(final_df.dtypes)


OBJECTID                           int64
IN_FID                             int64
Dist1_total_energy_output_kWh    float64
Dist2_total_energy_output_kWh    float64
Dist3_total_energy_output_kWh    float64
dtype: object


In [13]:
# Creating the new column 'Mean_Headspace(MVA)' by averaging the three headspace columns
final_df['Mean_SolarPV_Potential(kWh)'] = (final_df['Dist1_total_energy_output_kWh'] + final_df['Dist1_total_energy_output_kWh'] + final_df['Dist1_total_energy_output_kWh']) / 3

final_df

Unnamed: 0,OBJECTID,IN_FID,Dist1_total_energy_output_kWh,Dist2_total_energy_output_kWh,Dist3_total_energy_output_kWh,Mean_SolarPV_Potential(kWh)
0,1,144,4143264.0,3.531452e+05,2070968.0,4143264.0
1,2,87,2570153.0,6.131353e+05,3759156.0,2570153.0
2,3,279,0.0,0.000000e+00,0.0,0.0
3,3,383,0.0,0.000000e+00,0.0,0.0
4,4,170,2568242.0,1.734501e+03,225852.0,2568242.0
...,...,...,...,...,...,...
1055,1624,709,7107385.0,2.418455e+06,2418455.0,7107385.0
1056,1633,661,5431456.0,1.250822e+04,195845.8,5431456.0
1057,1660,210,12911910.0,8.981484e+04,224278.7,12911910.0
1058,1682,77,939024.9,1.439996e+06,842193.3,939024.9


In [16]:
# Converting kWh to MWH

final_df['Mean_SolarPV_Potential(MWH)'] = final_df['Mean_SolarPV_Potential(kWh)'] / 1000

final_df

Unnamed: 0,OBJECTID,IN_FID,Dist1_total_energy_output_kWh,Dist2_total_energy_output_kWh,Dist3_total_energy_output_kWh,Mean_SolarPV_Potential(kWh),Mean_SolarPV_Potential(MWH)
0,1,144,4143264.0,3.531452e+05,2070968.0,4143264.0,4143.2640
1,2,87,2570153.0,6.131353e+05,3759156.0,2570153.0,2570.1530
2,3,279,0.0,0.000000e+00,0.0,0.0,0.0000
3,3,383,0.0,0.000000e+00,0.0,0.0,0.0000
4,4,170,2568242.0,1.734501e+03,225852.0,2568242.0,2568.2420
...,...,...,...,...,...,...,...
1055,1624,709,7107385.0,2.418455e+06,2418455.0,7107385.0,7107.3850
1056,1633,661,5431456.0,1.250822e+04,195845.8,5431456.0,5431.4560
1057,1660,210,12911910.0,8.981484e+04,224278.7,12911910.0,12911.9100
1058,1682,77,939024.9,1.439996e+06,842193.3,939024.9,939.0249


In [20]:
# Export the final DataFrame to a CSV file
final_df.to_csv('C:/Users/USER/Desktop/MSc_Data_Science/Ind_Project/GHD/Project/Data/Mean_SolarEnergy_2.csv', index=False)

# Confirm that the file was saved successfully
print("Data exported successfully!")

Data exported successfully!
