In [None]:


from this import d
import pandas as pd
import duckdb

# THIS WILL BE CALLED WHEN GRID_MODEL IS ACTIVE
# this function manages solar and wind generation at the grid cell level
# it then aggregates the solar and wind generation to the bus level
# it then creates the pre and post tables for the solar and wind generation
# it then creates the pre and post tables for the aggregator
# it then creates the pre and post tables for the aggregator

input_iso = 'CHE'


# Simple read - this should come from the cache
df_solar_rezoning = pd.read_csv("../data/REZoning/REZoning_Solar.csv")

df_solar = df_solar_rezoning[df_solar_rezoning['ISO'] == input_iso]
# For each distinct grid_cell, retain the row with the maximum Installed Capacity Potential (MW)
# If there are ties, also retain the maximum Capacity Factor for that grid_cell

# First, sort by 'Calculated Installed Cap' descending, then by 'Capacity Factor' descending
df_solar = df_solar.sort_values(['grid_cell', 'Installed Capacity Potential (MW)', 'Capacity Factor'], ascending=[True, False, False])

# Drop duplicates, keeping the row with the highest Installed Cap (and highest Capacity Factor in case of ties)
df_solar = df_solar.drop_duplicates(subset=['grid_cell'], keep='first')


df_rez_grid_to_bus = pd.read_csv(f"output/{input_iso}/{input_iso}_zone_bus_mapping.csv")

df_rez_grid_to_bus['bus_id'] = df_rez_grid_to_bus['bus_id'].str.replace('way/', 'w', regex=False).str.replace('relation/', 'r', regex=False)


duckdb.register('df_rez_grid_to_bus', df_rez_grid_to_bus)
duckdb.register('df_solar', df_solar)

df_solar_fi_t = duckdb.sql(f"""
SELECT
    grid_cell AS "grid_cell",
    'e_sol-' || ISO || '_' || LPAD(CAST(id AS VARCHAR), 4, '0') AS "process",
    'elc_sol-' || ISO || '_' || LPAD(CAST(id AS VARCHAR), 4, '0') AS "comm-out",
    "Installed Capacity Potential (MW)"/1000 AS "cap_bnd",
    "Capacity Factor" AS "af~fx",
    FROM df_solar
""").to_df()

display(df_solar_fi_t)

duckdb.register('df_solar_fi_t', df_solar_fi_t)

df_solar_fi_p = duckdb.sql("""
select 
'ele' AS set,
process,'solar resource in grid cell ' || grid_cell AS description,
'GW' AS capacity_unit,
'TWh' AS activity_unit,
'annual' AS timeslicelevel,
'no' AS vintage
from df_solar_fi_t T1
""").to_df()

display(df_solar_fi_p)

df_agg_sol_fi_t = duckdb.sql(f"""
SELECT
    'distr_' || "comm-out" AS process,
    "comm-out" AS "comm-in",group_concat('e_' || bus_id) AS "comm-out",
    1 AS efficiency,
    T1.grid_cell,
FROM df_solar_fi_t T1
INNER JOIN df_rez_grid_to_bus T2
ON T1.grid_cell = T2.grid_cell
group by "comm-out",T1.grid_cell
""").to_df()

display(df_agg_sol_fi_t)

duckdb.register('df_agg_sol_fi_t', df_agg_sol_fi_t)

df_agg_sol_fi_p = duckdb.sql("""
select 
'pre' AS set,
process,'connecting solar to buses in grid cell ' || grid_cell AS description,
'GW' AS capacity_unit,
'TWh' AS activity_unit,
"comm-in" AS primarycg,
'daynite' AS timeslicelevel,
'no' AS vintage
from df_agg_sol_fi_t T1
""").to_df()

display(df_agg_sol_fi_p)

# now do the same for wind
# Simple read
df_wind = pd.read_csv("../data/REZoning/REZoning_WindOnshore.csv")

df_wind = df_wind[df_wind['ISO'] == input_iso]
# For each distinct grid_cell, retain the row with the maximum Installed Capacity Potential (MW)
# If there are ties, also retain the maximum Capacity Factor for that grid_cell

# First, sort by 'Calculated Installed Cap' descending, then by 'Capacity Factor' descending
df_wind = df_wind.sort_values(['grid_cell', 'Installed Capacity Potential (MW)', 'Capacity Factor'], ascending=[True, False, False])

# Drop duplicates, keeping the row with the highest Installed Cap (and highest Capacity Factor in case of ties)
df_wind = df_wind.drop_duplicates(subset=['grid_cell'], keep='first')


duckdb.register('df_wind', df_wind)

df_wind_fi_t = duckdb.sql(f"""
SELECT
    grid_cell AS "grid_cell",
    'e_win-' || ISO || '_' || LPAD(CAST(id AS VARCHAR), 4, '0') AS "process",
    'elc_win-' || ISO || '_' || LPAD(CAST(id AS VARCHAR), 4, '0') AS "comm-out",
    "Installed Capacity Potential (MW)"/1000 AS "cap_bnd",
    "Capacity Factor" AS "af~fx",
    FROM df_wind
""").to_df()

display(df_wind_fi_t)

duckdb.register('df_wind_fi_t', df_wind_fi_t)

df_wind_fi_p = duckdb.sql("""
select 
'ele' AS set,
process,'wind resource in grid cell ' || grid_cell AS description,
'GW' AS capacity_unit,
'TWh' AS activity_unit,
'annual' AS timeslicelevel,
'no' AS vintage
from df_wind_fi_t T1
""").to_df()

display(df_wind_fi_p)

df_agg_win_fi_t = duckdb.sql(f"""
SELECT
    'distr_' || "comm-out" AS process,
    "comm-out" AS "comm-in",group_concat('e_' || bus_id) AS "comm-out",
    1 AS efficiency,
    T1.grid_cell,
FROM df_wind_fi_t T1
INNER JOIN df_rez_grid_to_bus T2
ON T1.grid_cell = T2.grid_cell
group by "comm-out",T1.grid_cell
""").to_df()

display(df_agg_win_fi_t)

duckdb.register('df_agg_win_fi_t', df_agg_win_fi_t)

df_agg_win_fi_p = duckdb.sql("""
select 
'pre' AS set,
process,'connecting wind to buses in grid cell ' || grid_cell AS description,
'GW' AS capacity_unit,
'TWh' AS activity_unit,
"comm-in" AS primarycg,
'daynite' AS timeslicelevel,
'no' AS vintage
from df_agg_win_fi_t T1
""").to_df()

display(df_agg_win_fi_p)

df_fi_comm_sol_win = duckdb.sql("""
    select 'NRG' AS "set","comm-out" as commodity, 'solar generation in grid cell -- ' || grid_cell as "description"
        ,'ELC' as commoditytype, 'daynite' as "timeslicelevel", 'TWh' as unit
        from df_solar_fi_t
        UNION
        select 'NRG' AS "set","comm-out" as commodity, 'wind generation in grid cell -- ' || grid_cell as "description"
        ,'ELC' as commoditytype, 'daynite' as "timeslicelevel", 'TWh' as unit
        from df_wind_fi_t
        order by "comm-out"
""").to_df()

display(df_fi_comm_sol_win)

In [None]:
import dis
from this import d
import pandas as pd
import duckdb

def bus_id_to_commodity(bus_id: str, add_prefix: bool = True) -> str:
    """
    Transform bus ID to model commodity format.
    
    Converts OpenStreetMap bus identifiers to VerveStacks model commodities
    by cleaning prefixes and optionally adding model prefix.
    
    Args:
        bus_id: Bus identifier (e.g., "way/12345", "relation/67890")
        add_prefix: Whether to add "e_" prefix for model commodity format
        
    Returns:
        Formatted commodity string
        
    Examples:
        >>> bus_id_to_commodity("way/12345")
        'e_w12345'
        >>> bus_id_to_commodity("relation/67890", add_prefix=False)
        'r67890'
    """
    if not isinstance(bus_id, str):
        raise ValueError(f"Bus ID must be string, got {type(bus_id)}")
    
    # Clean bus ID: way/ → w, relation/ → r
    clean_id = bus_id.replace('way/', 'w').replace('relation/', 'r')
    
    # Add model prefix if requested
    if add_prefix:
        return f"e_{clean_id}"
    return clean_id

# THIS WILL BE CALLED WHEN GRID_MODEL IS ACTIVE
# this function manages solar and wind generation at the grid cell level
# it then aggregates the solar and wind generation to the bus level
# it then creates the pre and post tables for the solar and wind generation
# it then creates the pre and post tables for the aggregator
# it then creates the pre and post tables for the aggregator

input_iso = 'CHE'


df_bus_load_share = pd.read_csv(f"1_grids/output/{input_iso}/{input_iso}_bus_load_share.csv")


df_bus_load_share['bus_id'] = df_bus_load_share['bus_id'].apply(lambda x: bus_id_to_commodity(x, add_prefix=True))

duckdb.register('df_bus_load_share', df_bus_load_share)

df_elc_demand_shares = duckdb.sql("""
    select 
    'flo_shar' as attribute, 
    'elc_demand' as process, 
    bus_id as commodity, load_share * .99 as "2022",3 AS "0",'lo' as lim_type,
    from df_bus_load_share
""").to_df()

# display(df_elc_demand_shares)


df_elc_demand_topins = duckdb.sql("""
    select 
    'elc_demand' as process, 
    group_concat(bus_id) as commodity,
    'in' AS "io",
    from df_bus_load_share
""").to_df()

# display(df_elc_demand_topins)


lines_df = pd.read_csv(f"1_grids/output/{input_iso}/{input_iso}_clustered_lines.csv")

# Transform bus IDs to commodity format using standardized function
lines_df['comm1'] = lines_df['bus0'].apply(lambda x: bus_id_to_commodity(x, add_prefix=False))
lines_df['comm2'] = lines_df['bus1'].apply(lambda x: bus_id_to_commodity(x, add_prefix=False))

# display(lines_df)

duckdb.register('lines_df', lines_df)

df_grids_parameters = duckdb.sql("""
with lines as (
        SELECT comm1, comm2, type, bus0, bus1,
               round(max(length)/1000, 0) as length_km,
               sum(s_nom)/1000 as gw
        FROM lines_df
        GROUP BY comm1, comm2, type, bus0, bus1
        )
      select 'g_' || comm1 || '-' || comm2 AS process, gw AS pasti,
        1.1 * length_km as ncap_cost,
        0.00006 * length_km as efficiency
        from lines
        order by process
    """).to_df()

display(df_grids_parameters)

In [None]:
# THIS WILL BE CALLED WHEN GRID_MODEL IS NOT ACTIVE
# this function manages solar and wind generation at the grid cell level
# it then aggregates the solar and wind generation to the bus level
# it then creates the pre and post tables for the solar and wind generation
# it then creates the pre and post tables for the aggregator
# it then creates the pre and post tables for the aggregator

from this import d
import pandas as pd
import duckdb


input_iso = 'IND'


# Simple read
df_solar = pd.read_csv("data/REZoning/REZoning_Solar.csv")
df_solar = df_solar[df_solar['ISO'] == input_iso]

df_wind = pd.read_csv("data/REZoning/REZoning_WindOnshore.csv")
df_wind = df_wind[df_wind['ISO'] == input_iso]

df_costs = pd.read_csv("data/REZoning/REZoning_costs_per_kw.csv")
df_costs = df_costs[df_costs['iso'] == input_iso]


# For each distinct grid_cell, retain the row with the maximum Installed Capacity Potential (MW)
# If there are ties, also retain the maximum Capacity Factor for that grid_cell


# For each distinct grid_cell, retain the row with the maximum Installed Capacity Potential (MW)
# If there are ties, also retain the maximum Capacity Factor for that grid_cell

# First, sort by 'Installed Capacity Potential (MW)' descending, then by 'Capacity Factor' descending
df_wind = df_wind.sort_values(['grid_cell', 'Installed Capacity Potential (MW)', 'Capacity Factor'], ascending=[True, False, False])
# Drop duplicates, keeping the row with the highest Installed Cap (and highest Capacity Factor in case of ties)
df_wind = df_wind.drop_duplicates(subset=['grid_cell'], keep='first')


duckdb.register('df_wind', df_wind)

df_won_fi_t = duckdb.sql(f"""
    WITH wind_with_lcoe_class AS (
        SELECT
            *,"LCOE (USD/MWh)" - (T2.invcost*.1102 + T2.fixom)/8.76/"capacity factor" - 4 AS non_gen_lcoe,
            NTILE(5) OVER (
                PARTITION BY round("Capacity Factor", 2)
                ORDER BY "LCOE (USD/MWh)" - (T2.invcost*.1102 + T2.fixom)/8.76/"capacity factor" - 4
            ) AS lcoe_class
        FROM df_wind T1
        inner join df_costs T2
        on T2.tech = 'windons'
    )
    SELECT
        'e_won-' || ISO || '_' || cast(round("capacity factor",2)*100 as int) || '_c' || lcoe_class AS "process",
        'elc_won-' || ISO AS "comm-out",
        SUM("Installed Capacity Potential (MW)")/1000 AS "cap_bnd",
        SUM("Capacity Factor" * "Installed Capacity Potential (MW)")/SUM("Installed Capacity Potential (MW)") AS "af~fx",
        SUM(non_gen_lcoe * "Installed Capacity Potential (MW)")/SUM("Installed Capacity Potential (MW)") / .1102 AS "ncap_cost~USD21_alt",
        lcoe_class
    FROM wind_with_lcoe_class
    GROUP BY ISO, round("capacity factor",2), lcoe_class
    order by "af~fx" desc

""").to_df()


duckdb.register('df_won_fi_t', df_won_fi_t)

df_won_fi_p = duckdb.sql("""
select 
'ele' AS set,
process, 
'wind resource -- CF class ' || substr(process, instr(process, '_')+1, instr(process, '_c')-instr(process, '_')-1) || 
' -- cost class ' || substr(process, instr(process, '_c')+2) AS description,
'GW' AS capacity_unit,
'TWh' AS activity_unit,
'annual' AS timeslicelevel,
'no' AS vintage
from df_wind_fi_t T1
""").to_df()

display(df_won_fi_p)






# First, sort by 'Calculated Installed Cap' descending, then by 'Capacity Factor' descending
df_solar = df_solar.sort_values(['grid_cell', 'Installed Capacity Potential (MW)', 'Capacity Factor'], ascending=[True, False, False])
# Drop duplicates, keeping the row with the highest Installed Cap (and highest Capacity Factor in case of ties)
df_solar = df_solar.drop_duplicates(subset=['grid_cell'], keep='first')


duckdb.register('df_solar', df_solar)

df_solar_fi_t = duckdb.sql(f"""
    WITH solar_with_lcoe_class AS (
        SELECT
            *,"LCOE (USD/MWh)" - (T2.invcost*.1102 + T2.fixom)/8.76/"capacity factor" - 4 AS non_gen_lcoe,
            NTILE(5) OVER (
                PARTITION BY round("Capacity Factor", 2)
                ORDER BY "LCOE (USD/MWh)" - (T2.invcost*.1102 + T2.fixom)/8.76/"capacity factor" - 4
            ) AS lcoe_class
        FROM df_solar T1
        inner join df_costs T2
        on T1.ISO = T2.iso AND T2.tech = 'solarpv'
    )
    SELECT
        'e_spv-' || ISO || '_' || cast(round("capacity factor",2)*100 as int) || '_c' || lcoe_class AS "process",
        'elc_spv-' || ISO AS "comm-out",
        SUM("Installed Capacity Potential (MW)")/1000 AS "cap_bnd",
        SUM("Capacity Factor" * "Installed Capacity Potential (MW)")/SUM("Installed Capacity Potential (MW)") AS "af~fx",
        SUM(non_gen_lcoe * "Installed Capacity Potential (MW)")/SUM("Installed Capacity Potential (MW)") / .1102 AS "ncap_cost~USD21_alt",
        lcoe_class
    FROM solar_with_lcoe_class
    GROUP BY ISO, round("capacity factor",2), lcoe_class
    order by "af~fx" desc

""").to_df()


duckdb.register('df_solar_fi_t', df_solar_fi_t)

df_solar_fi_p = duckdb.sql("""
select 
'ele' AS set,
process, 
'solar resource -- CF class ' || substr(process, instr(process, '_')+1, instr(process, '_c')-instr(process, '_')-1) || 
' -- cost class ' || substr(process, instr(process, '_c')+2) AS description,
'GW' AS capacity_unit,
'TWh' AS activity_unit,
'annual' AS timeslicelevel,
'no' AS vintage
from df_solar_fi_t T1
""").to_df()

display(df_solar_fi_p)


df_fi_comm_sol_win = duckdb.sql("""
    select 'NRG' AS "set","comm-out" as commodity, 'solar generation'  as "description"
        ,'ELC' as commoditytype, 'daynite' as "timeslicelevel", 'TWh' as unit
        from df_solar_fi_t
        UNION
        select 'NRG' AS "set","comm-out" as commodity, 'wind generation' as "description"
        ,'ELC' as commoditytype, 'daynite' as "timeslicelevel", 'TWh' as unit
        from df_won_fi_t
        order by "comm-out"
""").to_df()

display(df_fi_comm_sol_win)









In [None]:
import pandas as pd
import duckdb


input_iso = 'CHE'


# Simple read - this should come from the cache
df_solar_rezoning = pd.read_csv("data/REZoning/REZoning_Solar.csv")

df_solar = df_solar_rezoning[df_solar_rezoning['ISO'] == input_iso]


df_solar = df_solar.drop_duplicates(subset='grid_cell', keep='first')

# display(df_solar)

# Shortlist records that capture 95% of installed capacity potential

# Sort by installed capacity descending
df_solar_sorted = df_solar.sort_values(by='Installed Capacity Potential (MW)', ascending=False)

# Calculate cumulative sum and total
df_solar_sorted['cumulative_cap'] = df_solar_sorted['Installed Capacity Potential (MW)'].cumsum()
total_cap = df_solar_sorted['Installed Capacity Potential (MW)'].sum()

# Find cutoff for 95% of total capacity
df_solar_sorted['cumulative_pct'] = df_solar_sorted['cumulative_cap'] / total_cap

# Select rows up to 95% of total capacity
df_solar_95 = df_solar_sorted[df_solar_sorted['cumulative_pct'] <= 0.97]

# If the last included row doesn't reach exactly 95%, include the next row to ensure at least 95%
if not df_solar_95.empty and df_solar_95['cumulative_pct'].iloc[-1] < 0.97:
    next_idx = df_solar_95.index[-1] + 1
    if next_idx in df_solar_sorted.index:
        df_solar_95 = df_solar_sorted.loc[:next_idx]

# display(df_solar_95)

df_wind = pd.read_csv("data/REZoning/REZoning_WindOnshore.csv")

df_wind = df_wind[df_wind['ISO'] == input_iso]

df_wind = df_wind.drop_duplicates(subset='grid_cell', keep='first')

df_wind_sorted = df_wind.sort_values(by='Installed Capacity Potential (MW)', ascending=False)

df_wind_sorted['cumulative_cap'] = df_wind_sorted['Installed Capacity Potential (MW)'].cumsum()
total_cap = df_wind_sorted['Installed Capacity Potential (MW)'].sum()

df_wind_sorted['cumulative_pct'] = df_wind_sorted['cumulative_cap'] / total_cap

df_wind_95 = df_wind_sorted[df_wind_sorted['cumulative_pct'] <= 0.97]

if not df_wind_95.empty and df_wind_95['cumulative_pct'].iloc[-1] < 0.97:
    next_idx = df_wind_95.index[-1] + 1
    if next_idx in df_wind_sorted.index:
        df_wind_95 = df_wind_sorted.loc[:next_idx]

# display(df_wind_95)

# Identify wind records with LCOE more than 2 standard deviations above the mean
lcoe_mean = df_wind['LCOE (USD/MWh)'].mean()
lcoe_std = df_wind['LCOE (USD/MWh)'].std()
high_lcoe_threshold = lcoe_mean + 1 * lcoe_std

df_wind_high_lcoe = df_wind[df_wind['LCOE (USD/MWh)'] > high_lcoe_threshold]

display(df_wind_high_lcoe)




In [None]:

import pandas as pd

input_iso = 'CHE'


# Load REF_NTC data for function development
def load_ref_ntc_data():
    """Load and process REF_NTC interconnector data"""
    file_path = "data/ember/europe_interconnection_data/Interconnectors/REF_NTC.csv"
    df = pd.read_csv(file_path)
    return df

# Load the data
df_ntc = load_ref_ntc_data()




In [None]:
import pandas as pd
from shared_data_loader import get_shared_loader
import duckdb

input_iso = 'CHE'


df_bus_load_share = pd.read_csv(f"1_grids/output/{input_iso}/{input_iso}_bus_load_share_voronoi.csv")
df_bus_load_share = df_bus_load_share[df_bus_load_share['load_share'] > 0].copy()
df_bus_load_share['bus_id'] = df_bus_load_share['bus_id'].apply(lambda x: bus_id_to_commodity(x, add_prefix=False))


shared_loader = get_shared_loader("data/")
df_dem_techs = shared_loader.get_vs_mappings_sheet('dem_techs')

duckdb.register('df_bus_load_share', df_bus_load_share)
duckdb.register('df_dem_techs', df_dem_techs)


df_demtech_topins = duckdb.sql("""
    select 
    T2.tech || '_'|| T1.bus_id as process,
    'e_' || T1.bus_id as commodity,
    'IN' as "io",
    from df_bus_load_share T1
    cross join df_dem_techs T2
    """).to_df()



df_demtech_flo_mark = duckdb.sql("""
    select 
    T2.tech || '_'|| T1.bus_id as process,
    'elc_buildings,elc_transport,elc_industry,elc_roadtransport' as commodity,
    T1.load_share as "flo_mark",
    'lo' as lim_type,
    from df_bus_load_share T1
    cross join df_dem_techs T2
    """).to_df()

display(df_demtech_flo_mark)

In [None]:
import pandas as pd
from spatial_utils import bus_id_to_commodity

input_iso = 'CHE'


buses_df = pd.read_csv(f"1_grids/output/{input_iso}/{input_iso}_clustered_buses.csv")

# Find tags with more than one row
tag_counts = buses_df['tags'].value_counts()
multi_tag = tag_counts[tag_counts > 1].index
# Filter to only those tags
df_multi = buses_df[buses_df['tags'].isin(multi_tag)].copy()

# Sort by tag and voltage descending (convert voltage to numeric for sorting)
df_multi['voltage_num'] = pd.to_numeric(df_multi['voltage'], errors='coerce')
df_multi = df_multi.sort_values(['tags', 'voltage_num'], ascending=[True, False])

# For each tag, create step-down transformer records
stepdown_records = []
for tag, group in df_multi.groupby('tags'):
    group_sorted = group.sort_values('voltage_num', ascending=False)
    bus_ids = group_sorted['bus_id'].tolist()
    voltages = group_sorted['voltage_num'].tolist()
    for i in range(len(bus_ids) - 1):
        higher_bus = bus_ids[i]
        lower_bus = bus_ids[i+1]
        higher_v = voltages[i]
        lower_v = voltages[i+1]
        # Pass bus ids through bus_id_to_commodity
        comm_in = bus_id_to_commodity(higher_bus, add_prefix=True)
        comm_out = bus_id_to_commodity(lower_bus, add_prefix=True)
        stepdown_records.append({
            'process': f"stepdown_{bus_id_to_commodity(tag, add_prefix=False)}_{int(higher_v)}to{int(lower_v)}",
            'comm-in': comm_in,
            'comm-out': comm_out,
            'efficiency': 1
        })

df_stepdown = pd.DataFrame(stepdown_records)
display(df_stepdown)


# Create a table with process, description, act unit, cap unit, timeslicelevel
df_stepdown_desc = pd.DataFrame({
    'set': "pre",
    'process': df_stepdown['process'],
    'description': (
        "Step-down transformer: "
        + df_stepdown['comm-in'] + " → " + df_stepdown['comm-out']
    ),
    'activityunit': "TWh",
    'capacityunit': "GW",
    'timeslicelevel': "daynite"
})
display(df_stepdown_desc)










In [None]:
import pandas as pd
from spatial_utils import bus_id_to_commodity

input_iso = 'CHE'


# Load REZoning data directly (not yet in shared_data_loader)
df_solar_rezoning = pd.read_csv("data/REZoning/REZoning_Solar.csv")
df_wind_rezoning = pd.read_csv("data/REZoning/REZoning_WindOnshore.csv")

# Load clustered buses data
buses_df = pd.read_csv(f"1_grids/output/{input_iso}/{input_iso}_clustered_buses.csv")

# Function to strip voltage from bus_id
def strip_voltage_from_bus_id(bus_id):
    """Remove voltage suffix from bus_id (e.g., 'bus123-380' -> 'bus123')"""
    if isinstance(bus_id, str) and '-' in bus_id:
        # Split by '-' and take all parts except the last one if it's numeric (voltage)
        parts = bus_id.split('-')
        if len(parts) > 1 and parts[-1].isdigit():
            return '-'.join(parts[:-1])
    return bus_id

# Apply voltage stripping to bus_id column
buses_df['clean_bus_id'] = buses_df['bus_id'].apply(strip_voltage_from_bus_id)

# Get distinct clean_bus_id with coordinates (keep first occurrence for each clean_bus_id)
buses_distinct = buses_df.drop_duplicates(subset=['clean_bus_id'], keep='first')

# Create a table with set = p_busid_to_commodity(bus_id), pset_co = p_busid_to_commodity(bus_id, True)
# write this df on a new sheet called "geolocation" in syssettings. veda marker (~geolocation)

# Create geolocation DataFrame from buses
df_geolocation_buses = pd.DataFrame({
    'grid_node': buses_distinct['clean_bus_id'].apply(lambda x: f"p_{bus_id_to_commodity(x, add_prefix=False)}"),
    'lat': buses_distinct['x'],
    'lng': buses_distinct['y']
})

# Add REZoning solar grid cells to geolocation
df_solar_iso = df_solar_rezoning[df_solar_rezoning['ISO'] == input_iso]
df_geolocation_solar = pd.DataFrame({
    'grid_node': df_solar_iso['grid_cell'].apply(lambda x: f"rez_{x}"),
    'lat': df_solar_iso['lat'],
    'lng': df_solar_iso['long']
})

# Combine both geolocation sources
df_geolocation = pd.concat([df_geolocation_buses, df_geolocation_solar], ignore_index=True)

# Remove duplicates based on grid_node (keep first occurrence)
df_geolocation = df_geolocation.drop_duplicates(subset=['grid_node'], keep='first')


display(df_geolocation)

# write this df on a new sheet called "geo_sets" in Sets-vervestacks (veda marker (~tfm_pset))
df_set_psetco = pd.DataFrame({
    'setname': buses_distinct['clean_bus_id'].apply(lambda x: f"p_{bus_id_to_commodity(x, add_prefix=False)}"),
    'pset_co': buses_distinct['clean_bus_id'].apply(lambda x: f"{bus_id_to_commodity(x, add_prefix=True)}*")
})


df_solar_set_psetco = pd.DataFrame({
    'setname': df_solar_iso['grid_cell'].apply(lambda x: f"rez_{x}"),
    'pset_co': df_solar_iso.apply(lambda row: f"elc*{row['ISO']}_{str(row['id']).zfill(4)}", axis=1)
})

df_set_psetco = pd.concat([df_set_psetco, df_solar_set_psetco], ignore_index=True)
    
display(df_set_psetco)

