In [None]:
import pandas as pd
import os

# Try using openpyxl as an engine
xls = pd.ExcelFile('coal_supply_chain_india.xlsx', engine='openpyxl')
all_sheets = {sheet_name: pd.read_excel(xls, sheet_name=sheet_name) for sheet_name in xls.sheet_names}

In [2]:
assets = all_sheets['assets']
ownership = all_sheets['ownership']
flood_exposure = all_sheets['flood exposure']
firms = all_sheets['firms']
supply_chain = all_sheets['supply chain']

In [3]:
# Subtract 1 from firm_id to make it start from 0
firms['firm_id'] = firms['firm_id'] - 1


In [4]:
firms

Unnamed: 0,firm_id,name,final_demand,discount_rate,unit_price,margin,leverage_ratio,type
0,0,-,100,0.05,20,0.02,0.02,production
1,1,24R Advisory Services Pvt,25,0.05,20,0.02,0.02,production
2,2,ACB (India),50,0.05,20,0.02,0.02,production
3,3,AM Mining India Pvt,25,0.05,20,0.02,0.02,production
4,4,AM/NS,100,0.05,20,0.02,0.02,production
...,...,...,...,...,...,...,...,...
159,159,Wilmar International,50,0.05,20,0.02,0.02,production
160,160,Yes Bank,25,0.05,20,0.02,0.02,production
161,161,natural person(s),50,0.05,20,0.02,0.02,production
162,162,small shareholder(s),100,0.05,20,0.02,0.02,production


In [5]:
# Create a mapping from asset_id to name
id_to_name = assets.set_index('asset_id')['name']

# Map the name column
ownership['name'] = ownership['asset_id'].map(id_to_name)

ownership

Unnamed: 0,asset_id,share,firm_id,name
0,358,1.0,11,Adani Raigarh Thermal Power Plant
1,359,1.0,59,Aditya Aluminium power station
2,360,1.0,59,Aditya Aluminium power station
3,361,1.0,59,Aditya Aluminium power station
4,362,1.0,59,Aditya Aluminium power station
...,...,...,...,...
1386,1206,1.0,48,Bedi Port Coal Terminal
1387,1212,1.0,49,Haldia Port Coal Terminal
1388,1219,1.0,1,Karaikal Port Coal Terminal
1389,1207,1.0,35,Cochin Port Coal Terminal


In [6]:
ownership['firm_id'] = ownership['firm_id'] - 1
ownership

Unnamed: 0,asset_id,share,firm_id,name
0,358,1.0,10,Adani Raigarh Thermal Power Plant
1,359,1.0,58,Aditya Aluminium power station
2,360,1.0,58,Aditya Aluminium power station
3,361,1.0,58,Aditya Aluminium power station
4,362,1.0,58,Aditya Aluminium power station
...,...,...,...,...
1386,1206,1.0,47,Bedi Port Coal Terminal
1387,1212,1.0,48,Haldia Port Coal Terminal
1388,1219,1.0,0,Karaikal Port Coal Terminal
1389,1207,1.0,34,Cochin Port Coal Terminal


In [7]:
supply_chain['supplier'] = supply_chain['supplier'] - 1
supply_chain['client'] = supply_chain['client'] - 1
supply_chain

Unnamed: 0,supplier,client,recipe,product
0,26,7,0.070721,subbituminous
1,26,87,0.374552,unknown
2,26,140,0.949854,unknown
3,138,7,0.389144,subbituminous
4,138,87,0.031325,unknown
...,...,...,...,...
288,103,10,0.816487,bituminous
289,127,10,0.888567,bituminous
290,51,10,0.459195,bituminous
291,142,10,0.169498,bituminous


In [8]:
# use asset level instead of unit level
assets_grouped = assets.groupby('name').agg({
    'asset_id': 'first',
    'latitude': 'first',
    'longitude': 'first',
    'country': 'first',
    'product': 'first',
    'replacement_value': 'sum',
    'production': 'sum'
}).reset_index()

In [9]:
assets_grouped

Unnamed: 0,name,asset_id,latitude,longitude,country,product,replacement_value,production
0,AKK Coal Mine,3,23.790500,85.919800,India,power,9.309546e+06,5.350
1,ARS Metals Gummidipoondi captive power station,409,13.423889,80.066111,India,power,5.742337e+07,33.000
2,ASP Coal Mine,329,23.741330,86.405159,India,power,7.151819e+05,0.411
3,Adani Godda power station,356,24.814107,87.135465,India,power,1.531290e+09,880.000
4,Adani Raigarh Thermal Power Plant,358,21.743775,83.274093,India,power,5.742337e+08,330.000
...,...,...,...,...,...,...,...,...
667,West Bokaro Coal Mine,112,23.792747,85.554231,India,power,1.131066e+07,6.500
668,West Jhagrakhand Coal Mine,194,23.197211,82.161616,India,power,2.088122e+05,0.120
669,Yadadri power station,1201,16.708219,79.581184,India,power,1.531290e+09,880.000
670,Yermarus power station,1203,16.295261,77.356120,India,power,1.531290e+09,880.000


In [10]:
ownership_grouped = ownership.groupby(['name', 'firm_id'], as_index=False).first()


In [11]:
ownership_grouped

Unnamed: 0,name,firm_id,asset_id,share
0,AKK Coal Mine,33,3,1.000
1,ARS Metals Gummidipoondi captive power station,7,409,1.000
2,ASP Coal Mine,33,329,1.000
3,Adani Godda power station,10,356,1.000
4,Adani Raigarh Thermal Power Plant,10,358,1.000
...,...,...,...,...
731,West Jhagrakhand Coal Mine,33,194,1.000
732,Yadadri power station,142,1201,1.000
733,Yermarus power station,22,1203,0.221
734,Yermarus power station,86,1203,0.779


In [12]:
assets_grouped = assets_grouped.drop(columns=['asset_id'])
ownership_grouped = ownership_grouped.drop(columns=['asset_id'])

In [13]:
assets_grouped

Unnamed: 0,name,latitude,longitude,country,product,replacement_value,production
0,AKK Coal Mine,23.790500,85.919800,India,power,9.309546e+06,5.350
1,ARS Metals Gummidipoondi captive power station,13.423889,80.066111,India,power,5.742337e+07,33.000
2,ASP Coal Mine,23.741330,86.405159,India,power,7.151819e+05,0.411
3,Adani Godda power station,24.814107,87.135465,India,power,1.531290e+09,880.000
4,Adani Raigarh Thermal Power Plant,21.743775,83.274093,India,power,5.742337e+08,330.000
...,...,...,...,...,...,...,...
667,West Bokaro Coal Mine,23.792747,85.554231,India,power,1.131066e+07,6.500
668,West Jhagrakhand Coal Mine,23.197211,82.161616,India,power,2.088122e+05,0.120
669,Yadadri power station,16.708219,79.581184,India,power,1.531290e+09,880.000
670,Yermarus power station,16.295261,77.356120,India,power,1.531290e+09,880.000


In [14]:
# 1. Create new numeric asset_id
assets_grouped['asset_id'] = range(len(assets_grouped))

# 2. Move asset_id to the first column
cols = ['asset_id'] + [col for col in assets_grouped.columns if col != 'asset_id']
assets_grouped = assets_grouped[cols]


In [15]:
assets_grouped

Unnamed: 0,asset_id,name,latitude,longitude,country,product,replacement_value,production
0,0,AKK Coal Mine,23.790500,85.919800,India,power,9.309546e+06,5.350
1,1,ARS Metals Gummidipoondi captive power station,13.423889,80.066111,India,power,5.742337e+07,33.000
2,2,ASP Coal Mine,23.741330,86.405159,India,power,7.151819e+05,0.411
3,3,Adani Godda power station,24.814107,87.135465,India,power,1.531290e+09,880.000
4,4,Adani Raigarh Thermal Power Plant,21.743775,83.274093,India,power,5.742337e+08,330.000
...,...,...,...,...,...,...,...,...
667,667,West Bokaro Coal Mine,23.792747,85.554231,India,power,1.131066e+07,6.500
668,668,West Jhagrakhand Coal Mine,23.197211,82.161616,India,power,2.088122e+05,0.120
669,669,Yadadri power station,16.708219,79.581184,India,power,1.531290e+09,880.000
670,670,Yermarus power station,16.295261,77.356120,India,power,1.531290e+09,880.000


In [16]:
ownership_grouped

Unnamed: 0,name,firm_id,share
0,AKK Coal Mine,33,1.000
1,ARS Metals Gummidipoondi captive power station,7,1.000
2,ASP Coal Mine,33,1.000
3,Adani Godda power station,10,1.000
4,Adani Raigarh Thermal Power Plant,10,1.000
...,...,...,...
731,West Jhagrakhand Coal Mine,33,1.000
732,Yadadri power station,142,1.000
733,Yermarus power station,22,0.221
734,Yermarus power station,86,0.779


In [17]:
# Create the mapping from assets_grouped
name_to_asset_id = assets_grouped.set_index('name')['asset_id']

# Map the asset_id in ownership_grouped using the name
ownership_grouped['asset_id'] = ownership_grouped['name'].map(name_to_asset_id)

# Move asset_id to the first column
ownership_grouped = ownership_grouped[['asset_id'] + [col for col in ownership_grouped.columns if col != 'asset_id']]

ownership_grouped = ownership_grouped.drop(columns=['name'])

In [18]:
ownership_grouped

Unnamed: 0,asset_id,firm_id,share
0,0,33,1.000
1,1,7,1.000
2,2,33,1.000
3,3,10,1.000
4,4,10,1.000
...,...,...,...
731,668,33,1.000
732,669,142,1.000
733,670,22,0.221
734,670,86,0.779


In [19]:
flood_exposure = flood_exposure = flood_exposure.head(672)
# Reset asset_id to start from 0 in flood_exposure
flood_exposure['asset_id'] = range(len(flood_exposure))
flood_exposure

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  flood_exposure['asset_id'] = range(len(flood_exposure))


Unnamed: 0,asset_id,5,10,25,50,100,200,500,1000,flood_protection
0,0,9.34,10.15,11.61,13.28,13.57,14.11,14.81,15.19,10.15
1,1,0.43,0.55,0.66,0.72,0.79,0.85,0.91,0.98,0.55
2,2,2.05,2.25,2.47,2.58,2.60,2.61,2.62,2.62,2.25
3,3,2.05,2.25,2.47,2.58,2.60,2.61,2.62,2.62,2.25
4,4,0.20,1.07,2.85,2.86,3.92,4.62,5.26,5.67,1.07
...,...,...,...,...,...,...,...,...,...,...
667,667,9.34,10.15,11.61,13.28,13.57,14.11,14.81,15.19,10.15
668,668,2.05,2.25,2.47,2.58,2.60,2.61,2.62,2.62,2.25
669,669,0.20,1.07,2.85,2.86,3.92,4.62,5.26,5.67,1.07
670,670,2.70,2.70,2.76,3.03,3.34,3.59,3.81,3.96,2.70


In [20]:
# Update the required sheets (assets and ownership)
all_sheets['assets'] = assets_grouped
all_sheets['ownership'] = ownership_grouped
all_sheets['flood exposure'] = flood_exposure
all_sheets['flood exposure'] = flood_exposure
all_sheets['firms'] = firms
all_sheets['supply chain'] = supply_chain

# Write all sheets back to a new Excel file (rename after saving)
with pd.ExcelWriter('coal_supply_chain_india_updated.xlsx', engine='xlsxwriter') as writer:
    for sheet_name, df in all_sheets.items():
        df.to_excel(writer, sheet_name=sheet_name, index=False)