# Selecting existng renewable energy sites closer from the industrial sites

In [5]:
from IPython.display import display, HTML

display(HTML("<style>.container { width:100% !important; }</style>"))

In [6]:
import pandas as pd

In [8]:
# importing renewable point source datasets
file_path = "C:/Users/USER/Desktop/MSc_Data_Science/Ind_Project/GHD/Project/Data/REPD_GB_1.csv"

# Load the CSV file into a DataFrame
repd = pd.read_csv(file_path, encoding='latin1')

# Display DataFrame (repd = renewable energy pointsource data)
repd

Unnamed: 0,OBJECTID,Site Name,Technology Type,Installed Capacity (MWelec),Development Status
0,337,Great Glen Scheme,Large Hydro,36,Operational
1,338,Glendoe Hydro Scheme,Large Hydro,100,Operational
2,340,Fasnakyle Hydro Extension,Large Hydro,7.5,Operational
3,341,Kinlochleven Hydro Power Station,Large Hydro,19.5,Operational
4,342,Gaur,Large Hydro,6.4,Operational
...,...,...,...,...,...
2346,9760,"Biggin Hill Airport, Main Road - Solar PV Array",Solar Photovoltaics,12,Under Construction
2347,9831,"Farnborough International Exhibition Centre, E...",Solar Photovoltaics,1.15,Under Construction
2348,9937,Hare Brewery - Solar Panels,Solar Photovoltaics,0.25,Operational
2349,10069,Jones Food Company - Solar Farm,Solar Photovoltaics,0.71,Operational


# Importing NearTable run on ArcGIS Pro. It shows the nearest three RE locations from each industrial site

In [9]:
# nt = near table (distance calculated from each industrial site to power substation using ArcGIS Pro 
# 1= nearest, 2= second nearest, 3 = furthest within 3)
# IN_FID = Industrial sites
# NEAR_FID = RE sites
# NEAR_DIST = distance from industrial sites to RE sites

file_path = "C:/Users/USER/Desktop/MSc_Data_Science/Ind_Project/GHD/Project/Data/repd_nearTable.csv"

# Load the CSV file into a DataFrame
repd_nt = pd.read_csv(file_path)     

# Display DataFrame
repd_nt

Unnamed: 0,IN_FID,NEAR_FID,NEAR_DIST,NEAR_RANK
0,1,7296,2137.755833,1
1,1,2755,7285.257030,2
2,1,1115,10806.566383,3
3,2,4739,10593.654138,1
4,2,4057,13546.571854,2
...,...,...,...,...
3175,1059,1838,3430.695702,2
3176,1059,4640,3655.379597,3
3177,1060,5198,5394.315990,1
3178,1060,1877,6059.947360,2


In [10]:
# # Drop the OBJECTID from the nt DataFrame
# nt = nt.drop(columns=['Rowid'])
# nt

In [11]:
# Rename the NEAR_FID column to OBJECTID in the nt DataFrame
# NEAR_FID are OBJECTID of power station's table and IN_FID = OBJECTID of NAEI

repd_nt.rename(columns={'NEAR_FID': 'OBJECTID'}, inplace=True)

repd_nt

Unnamed: 0,IN_FID,OBJECTID,NEAR_DIST,NEAR_RANK
0,1,7296,2137.755833,1
1,1,2755,7285.257030,2
2,1,1115,10806.566383,3
3,2,4739,10593.654138,1
4,2,4057,13546.571854,2
...,...,...,...,...
3175,1059,1838,3430.695702,2
3176,1059,4640,3655.379597,3
3177,1060,5198,5394.315990,1
3178,1060,1877,6059.947360,2


In [12]:
# Merge the two DataFrames on the OBJECTID column
merged_df = pd.merge(repd, repd_nt, on='OBJECTID', how='inner')

merged_df

Unnamed: 0,OBJECTID,Site Name,Technology Type,Installed Capacity (MWelec),Development Status,IN_FID,NEAR_DIST,NEAR_RANK
0,346,Striven,Large Hydro,8,Operational,430,15756.589891,3
1,348,Kilmorack,Large Hydro,20,Operational,994,6962.379263,2
2,349,Orrin,Large Hydro,18,Operational,994,4877.812009,1
3,350,Tor Achilty,Large Hydro,15,Operational,994,7993.373818,3
4,361,Beeston Weir Hydro Scheme,Small Hydro,1.7,Operational,305,3152.935140,2
...,...,...,...,...,...,...,...,...
3175,9831,"Farnborough International Exhibition Centre, E...",Solar Photovoltaics,1.15,Under Construction,896,13227.073183,1
3176,9831,"Farnborough International Exhibition Centre, E...",Solar Photovoltaics,1.15,Under Construction,919,2655.045762,1
3177,9831,"Farnborough International Exhibition Centre, E...",Solar Photovoltaics,1.15,Under Construction,987,6178.850783,1
3178,9937,Hare Brewery - Solar Panels,Solar Photovoltaics,0.25,Operational,529,8167.793643,2


In [13]:
merged_df.columns

Index(['OBJECTID', 'Site Name', 'Technology Type',
       'Installed Capacity (MWelec)', 'Development Status', 'IN_FID',
       'NEAR_DIST', 'NEAR_RANK'],
      dtype='object')

In [14]:
# # Drop the specified columns from the nt DataFrame
# merged_df = merged_df.drop(columns=['PF'])
# merged_df

In [15]:
# Sort the dataframe by IN_FID and then by NEAR_DIST to ensure proper ordering
sorted_df = merged_df.sort_values(by=['IN_FID', 'NEAR_DIST'])

# Add a new rank column based on sorted distances
sorted_df['Distance_Rank'] = sorted_df.groupby('IN_FID')['NEAR_DIST'].rank(method='first').astype(int)

# Pivot the table based on IN_FID and the new Distance_Rank, using NEAR_DIST as the values
piv_df = sorted_df.pivot_table(index='IN_FID', columns='Distance_Rank', values='Installed Capacity (MWelec)', aggfunc='first')

# Rename the columns to Dist1_Headspace, Dist2_Headspace, etc.
piv_df.columns = [f'Dist{int(col)}_Installed Capacity (MWelec)' for col in piv_df.columns]

# Reset the index to bring IN_FID back as a column
piv_df.reset_index(inplace=True)

# Merge the pivoted DataFrame back with the original DataFrame to retain other columns
final_df = pd.merge(merged_df.drop_duplicates(subset=['IN_FID']), piv_df, on='IN_FID', how='left')

# Display the final DataFrame
final_df


Unnamed: 0,OBJECTID,Site Name,Technology Type,Installed Capacity (MWelec),Development Status,IN_FID,NEAR_DIST,NEAR_RANK,Dist1_Installed Capacity (MWelec),Dist2_Installed Capacity (MWelec),Dist3_Installed Capacity (MWelec)
0,346,Striven,Large Hydro,8,Operational,430,15756.589891,3,0.34,1.4,8
1,348,Kilmorack,Large Hydro,20,Operational,994,6962.379263,2,18,20,15
2,361,Beeston Weir Hydro Scheme,Small Hydro,1.7,Operational,305,3152.935140,2,0.35,1.7,0.18
3,361,Beeston Weir Hydro Scheme,Small Hydro,1.7,Operational,834,1748.284874,1,1.7,0.35,0.36
4,361,Beeston Weir Hydro Scheme,Small Hydro,1.7,Operational,842,3798.697145,3,0.35,0.18,1.7
...,...,...,...,...,...,...,...,...,...,...,...
1055,6663,"Morrisons, Bradford Road, Idle - Solar PV Panels",Solar Photovoltaics,0.8,Operational,599,5771.357986,1,0.8,<Null>,0.22
1056,6663,"Morrisons, Bradford Road, Idle - Solar PV Panels",Solar Photovoltaics,0.8,Operational,985,8299.490165,2,<Null>,0.8,0.31
1057,7139,Dolphin Square - Solar panels,Solar Photovoltaics,<Null>,Under Construction,555,5904.589571,1,<Null>,0.43,0.18
1058,7262,"Blanche Lane, South Mimms - Solar Panels",Solar Photovoltaics,0.61,Under Construction,698,5699.916929,2,<Null>,0.61,3


In [16]:
# Drop the specified columns from the nt DataFrame
final_df = final_df.drop(columns=['Site Name', 'Technology Type', 'Development Status', 'NEAR_RANK', 'NEAR_DIST'])

final_df

Unnamed: 0,OBJECTID,Installed Capacity (MWelec),IN_FID,Dist1_Installed Capacity (MWelec),Dist2_Installed Capacity (MWelec),Dist3_Installed Capacity (MWelec)
0,346,8,430,0.34,1.4,8
1,348,20,994,18,20,15
2,361,1.7,305,0.35,1.7,0.18
3,361,1.7,834,1.7,0.35,0.36
4,361,1.7,842,0.35,0.18,1.7
...,...,...,...,...,...,...
1055,6663,0.8,599,0.8,<Null>,0.22
1056,6663,0.8,985,<Null>,0.8,0.31
1057,7139,<Null>,555,<Null>,0.43,0.18
1058,7262,0.61,698,<Null>,0.61,3


In [17]:
# To check data types of all columns in a DataFrame
print(final_df.dtypes)


OBJECTID                              int64
Installed Capacity (MWelec)          object
IN_FID                                int64
Dist1_Installed Capacity (MWelec)    object
Dist2_Installed Capacity (MWelec)    object
Dist3_Installed Capacity (MWelec)    object
dtype: object


In [18]:
# Convert a specific column from object to float
final_df['Dist1_Installed Capacity (MWelec)'] = pd.to_numeric(final_df['Dist1_Installed Capacity (MWelec)'], errors='coerce')
final_df['Dist2_Installed Capacity (MWelec)'] = pd.to_numeric(final_df['Dist2_Installed Capacity (MWelec)'], errors='coerce')
final_df['Dist3_Installed Capacity (MWelec)'] = pd.to_numeric(final_df['Dist3_Installed Capacity (MWelec)'], errors='coerce')

final_df

Unnamed: 0,OBJECTID,Installed Capacity (MWelec),IN_FID,Dist1_Installed Capacity (MWelec),Dist2_Installed Capacity (MWelec),Dist3_Installed Capacity (MWelec)
0,346,8,430,0.34,1.40,8.00
1,348,20,994,18.00,20.00,15.00
2,361,1.7,305,0.35,1.70,0.18
3,361,1.7,834,1.70,0.35,0.36
4,361,1.7,842,0.35,0.18,1.70
...,...,...,...,...,...,...
1055,6663,0.8,599,0.80,,0.22
1056,6663,0.8,985,,0.80,0.31
1057,7139,<Null>,555,,0.43,0.18
1058,7262,0.61,698,,0.61,3.00


In [19]:
# Count the number of missing values in each column
final_df.isnull().sum()


OBJECTID                              0
Installed Capacity (MWelec)           0
IN_FID                                0
Dist1_Installed Capacity (MWelec)    64
Dist2_Installed Capacity (MWelec)    34
Dist3_Installed Capacity (MWelec)    37
dtype: int64

In [20]:
# Fill the null PF values with the average of existing numeric PF values
final_df['Dist1_Installed Capacity (MWelec)'].fillna(final_df['Dist1_Installed Capacity (MWelec)'].mean(), inplace=True)
final_df['Dist2_Installed Capacity (MWelec)'].fillna(final_df['Dist2_Installed Capacity (MWelec)'].mean(), inplace=True)
final_df['Dist3_Installed Capacity (MWelec)'].fillna(final_df['Dist3_Installed Capacity (MWelec)'].mean(), inplace=True)

final_df

Unnamed: 0,OBJECTID,Installed Capacity (MWelec),IN_FID,Dist1_Installed Capacity (MWelec),Dist2_Installed Capacity (MWelec),Dist3_Installed Capacity (MWelec)
0,346,8,430,0.340000,1.400000,8.00
1,348,20,994,18.000000,20.000000,15.00
2,361,1.7,305,0.350000,1.700000,0.18
3,361,1.7,834,1.700000,0.350000,0.36
4,361,1.7,842,0.350000,0.180000,1.70
...,...,...,...,...,...,...
1055,6663,0.8,599,0.800000,6.737622,0.22
1056,6663,0.8,985,6.586386,0.800000,0.31
1057,7139,<Null>,555,6.586386,0.430000,0.18
1058,7262,0.61,698,6.586386,0.610000,3.00


In [21]:
# Creating the new column 'Mean_Headspace(MVA)' by averaging the three headspace columns
final_df['Mean_Installed_Capacity(MWelec)'] = (final_df['Dist1_Installed Capacity (MWelec)'] + final_df['Dist2_Installed Capacity (MWelec)'] + final_df['Dist3_Installed Capacity (MWelec)']) / 3

final_df

Unnamed: 0,OBJECTID,Installed Capacity (MWelec),IN_FID,Dist1_Installed Capacity (MWelec),Dist2_Installed Capacity (MWelec),Dist3_Installed Capacity (MWelec),Mean_Installed_Capacity(MWelec)
0,346,8,430,0.340000,1.400000,8.00,3.246667
1,348,20,994,18.000000,20.000000,15.00,17.666667
2,361,1.7,305,0.350000,1.700000,0.18,0.743333
3,361,1.7,834,1.700000,0.350000,0.36,0.803333
4,361,1.7,842,0.350000,0.180000,1.70,0.743333
...,...,...,...,...,...,...,...
1055,6663,0.8,599,0.800000,6.737622,0.22,2.585874
1056,6663,0.8,985,6.586386,0.800000,0.31,2.565462
1057,7139,<Null>,555,6.586386,0.430000,0.18,2.398795
1058,7262,0.61,698,6.586386,0.610000,3.00,3.398795


In [22]:
# the number of hours in a year
hours_in_year = 8760

# Calculate Mean Installed Capacity in MWh
final_df['Mean_Installed_Capacity(MWH)'] = final_df['Mean_Installed_Capacity(MWelec)'] * hours_in_year

final_df


Unnamed: 0,OBJECTID,Installed Capacity (MWelec),IN_FID,Dist1_Installed Capacity (MWelec),Dist2_Installed Capacity (MWelec),Dist3_Installed Capacity (MWelec),Mean_Installed_Capacity(MWelec),Mean_Installed_Capacity(MWH)
0,346,8,430,0.340000,1.400000,8.00,3.246667,28440.800000
1,348,20,994,18.000000,20.000000,15.00,17.666667,154760.000000
2,361,1.7,305,0.350000,1.700000,0.18,0.743333,6511.600000
3,361,1.7,834,1.700000,0.350000,0.36,0.803333,7037.200000
4,361,1.7,842,0.350000,0.180000,1.70,0.743333,6511.600000
...,...,...,...,...,...,...,...,...
1055,6663,0.8,599,0.800000,6.737622,0.22,2.585874,22652.255750
1056,6663,0.8,985,6.586386,0.800000,0.31,2.565462,22473.445783
1057,7139,<Null>,555,6.586386,0.430000,0.18,2.398795,21013.445783
1058,7262,0.61,698,6.586386,0.610000,3.00,3.398795,29773.445783


In [23]:
# Check for null values in the dataframe
null_values = final_df.isnull().sum()

null_values


OBJECTID                             0
Installed Capacity (MWelec)          0
IN_FID                               0
Dist1_Installed Capacity (MWelec)    0
Dist2_Installed Capacity (MWelec)    0
Dist3_Installed Capacity (MWelec)    0
Mean_Installed_Capacity(MWelec)      0
Mean_Installed_Capacity(MWH)         0
dtype: int64

In [24]:
# Export the final DataFrame to a CSV file
final_df.to_csv('C:/Users/USER/Desktop/MSc_Data_Science/Ind_Project/GHD/Project/Data/RE_mean_capacity.csv', index=False)

# Confirm that the file was saved successfully
print("Data exported successfully!")

Data exported successfully!
