In [21]:
import pandas as pd
import numpy as np
from shapely.geometry import Point
import geopandas as gpd

In [22]:
def haversine_vec(lat1, lon1, lat2, lon2):
    # lat1/lon1 shape (N,1), lat2/lon2 shape (1,M) -> result (N,M)
    lat1, lon1, lat2, lon2 = map(np.radians, [lat1, lon1, lat2, lon2])
    dlat = lat2 - lat1
    dlon = lon2 - lon1
    a = np.sin(dlat/2)**2 + np.cos(lat1)*np.cos(lat2)*np.sin(dlon/2)**2
    return 2 * 6371 * np.arcsin(np.sqrt(a))

def grid_calculate(main_centroid, secondary_centroid):

   # Inputs: secondary_centroid (M x 3), main_centroid (N x 3) as above
    lat_g = main_centroid["lat"].to_numpy()[:, None]  # (N,1)
    lon_g = main_centroid["lon"].to_numpy()[:, None]  # (N,1)
    lat_s = secondary_centroid["lat"].to_numpy()[None, :]  # (1,M)
    lon_s = secondary_centroid["lon"].to_numpy()[None, :]  # (1,M)

    D = haversine_vec(lat_g, lon_g, lat_s, lon_s)  # (N,M) distances
    idx_min = np.argmin(D, axis=1)                # (N,) nearest plant index
    dist_min = D[np.arange(D.shape[0]), idx_min]  # (N,)

    main_centroid["nearest_plant_id"]  = secondary_centroid["plant_id"].to_numpy()[idx_min]
    main_centroid["nearest_plant_lat"] = secondary_centroid["lat"].to_numpy()[idx_min]
    main_centroid["nearest_plant_lon"] = secondary_centroid["lon"].to_numpy()[idx_min]
    main_centroid["distance_km"]       = dist_min

    dmin, dmax = dist_min.min(), dist_min.max()
    main_centroid["score"] = 1.0 if dmax == dmin else 1 - (dist_min - dmin) / (dmax - dmin)
    return main_centroid




In [None]:
def convert_geojson(df, path):
    # 2) Make sure lon/lat are numeric
    df['lon'] = pd.to_numeric(df['lon'], errors='coerce')
    df['lat'] = pd.to_numeric(df['lat'], errors='coerce')

    # 3) Create GeoDataFrame (points from lon/lat)
    gdf = gpd.GeoDataFrame(
        df,
        geometry=gpd.points_from_xy(df['lon'], df['lat']),
        crs="EPSG:4326"  # WGS84 lon/lat
    )
    gdf.to_file(path, layer="grid", driver="GPKG")


In [24]:
solar_path = '../data/centroid_solarPV.csv'
solar_centroid = pd.read_csv(solar_path)

grid_path = '../data/centroid_grid.csv'   # <- looks like you meant a different file
grid_centroid = pd.read_csv(grid_path)

station_path = '../data/centroid_stacje_lowerSilesian.csv'
station_centroid = pd.read_csv(station_path)

In [26]:
calculated_grid_solar_centroid = grid_calculate(grid_centroid, solar_centroid)

calculated_grid_station_centroid = grid_calculate(grid_centroid, station_centroid)

In [31]:
path_1= '../output/calculate_grid_pv_centroids.gpkg'
convert_geojson(calculated_grid_solar_centroid,path_1)

In [32]:
calculated_grid_solar_centroid

Unnamed: 0,grid_id,lon,lat,nearest_plant_id,nearest_plant_lat,nearest_plant_lon,distance_km,score
0,1,14.818255,50.881762,351,50.918983,14.872268,5.610491,0.801238
1,2,14.818044,50.880391,351,50.918983,14.872268,5.733665,0.796873
2,3,14.818197,50.878770,351,50.918983,14.872268,5.862871,0.792294
3,4,14.818657,50.877044,351,50.918983,14.872268,5.990265,0.787779
4,5,14.818915,50.875518,351,50.918983,14.872268,6.112232,0.783456
...,...,...,...,...,...,...,...,...
501945,501946,17.793009,51.190281,583,51.197999,17.389951,28.099800,0.004196
501946,501947,17.792567,51.189210,583,51.197999,17.389951,28.073202,0.005139
501947,501948,17.794790,51.193547,583,51.197999,17.389951,28.214147,0.000143
501948,501949,17.794751,51.192137,583,51.197999,17.389951,28.215034,0.000112


In [35]:
calculated_grid_station_centroid.rename(columns={'nearest_plant_id':'nearest_station_id', 'nearest_plant_lat':'nearest_station_lat', 'nearest_plat_lon':'nearest_station_lat'},inplace=True)

In [36]:
path_2 = '../output/calculate_grid_station_centroids.gpkg'
convert_geojson(calculated_grid_station_centroid,path_2)