In [18]:
# 1_imports_and_config.py
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from pathlib import Path

In [19]:
# Paths 
data_dir = Path(r"..\data\processed")   # directory containing p1.xlsx ... p12.xlsx
out_dir = Path("./outputs")
out_dir.mkdir(exist_ok=True)

# Filenames (1..12)
files = {i: data_dir / f"p{i}.xlsx" for i in range(1,13)}

In [20]:
# 2_load_data.py
# Loads all p1..p12 into a dictionary of DataFrames: dfs[1] -> p1, ..., dfs[12] -> p12
dfs = {}
for i in range(1,13):
    fp = files[i]
    try:
        dfs[i] = pd.read_excel(fp)
        print(f"Loaded p{i} ({fp.name}) — {len(dfs[i])} rows, {len(dfs[i].columns)} cols")
    except Exception as e:
        dfs[i] = None
        print(f"Could not load p{i} ({fp.name}): {e}")


Loaded p1 (p1.xlsx) — 13 rows, 29 cols
Loaded p2 (p2.xlsx) — 13 rows, 6 cols
Loaded p3 (p3.xlsx) — 13 rows, 14 cols
Loaded p4 (p4.xlsx) — 13 rows, 4 cols
Loaded p5 (p5.xlsx) — 13 rows, 29 cols
Loaded p6 (p6.xlsx) — 13 rows, 6 cols
Loaded p7 (p7.xlsx) — 13 rows, 14 cols
Loaded p8 (p8.xlsx) — 13 rows, 4 cols
Loaded p9 (p9.xlsx) — 13 rows, 8 cols
Loaded p10 (p10.xlsx) — 13 rows, 7 cols
Loaded p11 (p11.xlsx) — 13 rows, 7 cols
Loaded p12 (p12.xlsx) — 13 rows, 41 cols


In [21]:
# 3_helpers_and_inspect.py
def normalize_year_col(df, col='End of period'):
    df = df.copy()
    if col not in df.columns:
        raise KeyError(f"Year column '{col}' not found")
    # coerce to numeric, drop missing
    df[col] = pd.to_numeric(df[col], errors='coerce')
    df = df.dropna(subset=[col])
    # convert to int
    df[col] = df[col].astype(int)
    return df

# Normalize key files we'll use (p1, p5, p12 and p3,p8,p9,p10 if present)
for idx in [1,3,5,8,9,10,12]:
    if dfs.get(idx) is not None:
        try:
            dfs[idx] = normalize_year_col(dfs[idx], 'End of period')
        except Exception as e:
            print(f"Warning normalizing p{idx}: {e}")

# Quick peek
for idx in [1,5,12]:
    df = dfs.get(idx)
    if df is not None:
        print(f"p{idx} columns: {df.columns.tolist()[:10]} ...")


p1 columns: ['Start of period', 'End of period', 'GVA at basic prices', 'Taxes on products (including import duties)', 'Less subsidies on products', 'GDP', 'CFC', 'NDP', 'PFCE', 'GFCE'] ...
p5 columns: ['Start of period', 'End of period', 'GVA at basic prices', 'Taxes on products (including import duties)', 'Less subsidies on products', 'GDP', 'CFC', 'NDP', 'PFCE', 'GFCE'] ...
p12 columns: ['Start of period', 'End of period', 'PI - GVA at basic prices', 'PI - agriculture, forestry & fishing', ' PI - mining & quarrying', 'PI - manufacturing', 'PI - electricity, gas, water supply & other utility services', 'PI - construction', 'PI - trade, repair, hotels & restaurants', 'PI - transport, storage, communication & services related to broadcasting'] ...


In [25]:
# 4_merge_gdp.py
p1 = dfs[1]  # current prices
p5 = dfs[5]  # constant prices

# Ensure both present
if p1 is None or p5 is None:
    raise SystemExit("p1 or p5 missing — cannot proceed")

# Select core columns (if present)
p1_sel = p1[['End of period', 'GDP']].rename(columns={'End of period': 'Year', 'GDP': 'Nominal_GDP'})
p5_sel = p5[['End of period', 'GDP']].rename(columns={'End of period': 'Year', 'GDP': 'Real_GDP'})

# If PFCE/GFCE/GFCF exist in p1, keep them
for col in ['PFCE', 'GFCE', 'GFCF', 'GCF', 'Export of goods and services', 'Less imports of goods and services']:
    if col in p1.columns:
        p1_sel[col] = p1[col]

# Merge
gdp = pd.merge(p1_sel, p5_sel, on='Year', how='inner', validate='1:1').sort_values('Year').reset_index(drop=True)
gdp.head()


Unnamed: 0,Year,Nominal_GDP,PFCE,GFCE,GFCF,GCF,Export of goods and services,Less imports of goods and services,Real_GDP
0,2012,8736329.0,4910447.0,968375.0,2997733.0,3403008.0,2143931.0,2715554.0,8736329.0
1,2013,9944013.0,5614484.0,1062404.0,3324973.0,3847122.0,2439707.0,3108428.0,9213017.0
2,2014,11233520.0,6475649.0,1156509.0,3515621.0,3794135.0,2856781.0,3191811.0,9801370.0
3,2015,12467960.0,7247340.0,1301762.0,3750392.0,4179779.0,2863636.0,3235962.0,10527670.0
4,2016,13771870.0,8126408.0,1436171.0,3957092.0,4422659.0,2728647.0,3044923.0,11369490.0


In [7]:
gdp = pd.merge(p1, p5, on='End of period', how='inner')
gdp = gdp.rename(columns={'End of period': 'Year'})
gdp.head()

Unnamed: 0,Year,Nominal_GDP,Real_GDP
0,2012,8736329.0,8736329.0
1,2013,9944013.0,9213017.0
2,2014,11233520.0,9801370.0
3,2015,12467960.0,10527670.0
4,2016,13771870.0,11369490.0


In [27]:
# 5_compute_metrics.py
gdp = gdp.copy()

# Deflator (index)
gdp['Deflator'] = (gdp['Nominal_GDP'] / gdp['Real_GDP']) * 100.0

# YoY growths
gdp['Nominal_Growth_%'] = gdp['Nominal_GDP'].pct_change() * 100
gdp['Real_Growth_%'] = gdp['Real_GDP'].pct_change() * 100

# Growth gap
gdp['Growth_Gap_%'] = gdp['Nominal_Growth_%'] - gdp['Real_Growth_%']

# Expenditure shares (if present) — percent of Nominal GDP
for col in ['PFCE', 'GFCE', 'GFCF', 'GCF', 'Export of goods and services', 'Less imports of goods and services']:
    if col in gdp.columns:
        gdp[f"{col}_pctGDP"] = (gdp[col] / gdp['Nominal_GDP']) * 100

# Net exports if exports & imports available
if ('Export of goods and services' in gdp.columns) and ('Less imports of goods and services' in gdp.columns):
    gdp['Net_Exports'] = gdp['Export of goods and services'] - gdp['Less imports of goods and services']
    gdp['Net_Exports_pctGDP'] = (gdp['Net_Exports'] / gdp['Nominal_GDP']) * 100

gdp[['Year','Nominal_GDP','Real_GDP','Deflator','Nominal_Growth_%','Real_Growth_%','Growth_Gap_%']].tail()


Unnamed: 0,Year,Nominal_GDP,Real_GDP,Deflator,Nominal_Growth_%,Real_Growth_%,Growth_Gap_%
8,2020,20103590.0,14534640.0,138.315031,6.370082,3.871437,2.498645
9,2021,19854100.0,13694870.0,144.974703,-1.241056,-5.777725,4.536669
10,2022,23597400.0,15021850.0,157.087206,18.854057,9.689592,9.164464
11,2023,26890470.0,16164910.0,166.350861,13.955242,7.609365,6.345877
12,2024,30122960.0,17650590.0,170.662591,12.020924,9.190755,2.830169


In [28]:
# 6_extract_inflation_p12.py
p12 = dfs[12]
if p12 is None:
    print("p12 missing — CPI comparisons will be skipped")
else:
    # Try to find CPI Combined column (robust search)
    cpi_candidates = [c for c in p12.columns if 'CPI' in c or 'Consumer prices' in c or 'CPI (Combined)' in c]
    cpi_col = None
    preferred = ['Consumer prices - CPI (Combined)', 'CPI (Combined)', 'Consumer prices - CPI Combined', 'CPI (Combined)']
    for name in preferred:
        if name in p12.columns:
            cpi_col = name
            break
    if cpi_col is None and cpi_candidates:
        cpi_col = cpi_candidates[0]

    if cpi_col:
        p12_sub = p12[['End of period', cpi_col]].rename(columns={'End of period':'Year', cpi_col: 'CPI_Combined'})
        p12_sub['Year'] = p12_sub['Year'].astype(int)
        # Merge
        gdp = pd.merge(gdp, p12_sub, on='Year', how='left')
        gdp['CPI_YoY_%'] = gdp['CPI_Combined'].pct_change() * 100
        print("Merged CPI column:", cpi_col)
    else:
        print("CPI column not found automatically in p12.")


Merged CPI column: Consumer prices - CPI (Combined)


In [29]:
# 7_plots_and_save.py
def savefig(fig, fname):
    path = out_dir / fname
    fig.tight_layout()
    fig.savefig(path, dpi=200)
    print("Saved:", path)

# 7.1 Nominal vs Real GDP (levels)
fig, ax = plt.subplots(figsize=(10,5))
ax.plot(gdp['Year'], gdp['Nominal_GDP'], marker='o', label='Nominal GDP')
ax.plot(gdp['Year'], gdp['Real_GDP'], marker='o', label='Real GDP')
ax.set_title('Nominal vs Real GDP (₹ crore)')
ax.set_xlabel('Year'); ax.set_ylabel('GDP (₹ crore)')
ax.legend()
savefig(fig, "nominal_vs_real_gdp.png")
plt.close(fig)

# 7.2 Growth rates
fig, ax = plt.subplots(figsize=(10,5))
ax.plot(gdp['Year'], gdp['Nominal_Growth_%'], marker='o', label='Nominal Growth %')
ax.plot(gdp['Year'], gdp['Real_Growth_%'], marker='o', label='Real Growth %')
ax.set_title('Nominal vs Real GDP Growth (%)'); ax.set_xlabel('Year'); ax.set_ylabel('Growth Rate (%)')
ax.axhline(0, linestyle='--', color='grey', linewidth=0.7)
ax.legend()
savefig(fig, "growth_rates_nominal_vs_real.png")
plt.close(fig)

# 7.3 Growth gap
fig, ax = plt.subplots(figsize=(10,4))
ax.plot(gdp['Year'], gdp['Growth_Gap_%'], marker='o', linestyle='-')
ax.axhline(0, color='black', linewidth=0.8)
ax.set_title('Gap between Nominal and Real GDP Growth (%)')
ax.set_xlabel('Year'); ax.set_ylabel('Gap (%)')
savefig(fig, "growth_gap_inflation_effect.png")
plt.close(fig)

# 7.4 Deflator vs CPI (if CPI present)
if 'CPI_Combined' in gdp.columns and not gdp['CPI_Combined'].isna().all():
    fig, ax1 = plt.subplots(figsize=(10,5))
    ax1.plot(gdp['Year'], gdp['Deflator'], marker='o', label='Implied GDP Deflator')
    ax1.set_xlabel('Year'); ax1.set_ylabel('Deflator (index)')
    ax2 = ax1.twinx()
    ax2.plot(gdp['Year'], gdp['CPI_Combined'], marker='x', linestyle='--', label='CPI (Combined)', alpha=0.9)
    ax2.set_ylabel('CPI (index)')
    ax1.set_title('Implied GDP Deflator vs CPI (Combined)')
    lines1, labels1 = ax1.get_legend_handles_labels()
    lines2, labels2 = ax2.get_legend_handles_labels()
    ax1.legend(lines1 + lines2, labels1 + labels2, loc='upper left')
    savefig(fig, "deflator_vs_cpi.png")
    plt.close(fig)
else:
    print("Skipping deflator vs CPI plot (CPI not available).")

# 7.5 Expenditure stacked area (if components exist)
comp_cols = []
labels = []
for col in ['PFCE_pctGDP', 'GFCE_pctGDP', 'GFCF_pctGDP', 'Net_Exports_pctGDP']:
    if col in gdp.columns:
        comp_cols.append(col)
        labels.append(col.replace('_pctGDP',''))

if comp_cols:
    fig, ax = plt.subplots(figsize=(10,6))
    x = gdp['Year'].values
    y = gdp[comp_cols].fillna(0).values.T
    ax.stackplot(x, *y, labels=labels)
    ax.set_title('Expenditure composition (% of Nominal GDP)')
    ax.set_xlabel('Year'); ax.set_ylabel('% of GDP')
    ax.legend(loc='upper left')
    savefig(fig, "expenditure_composition_stacked_area.png")
    plt.close(fig)
else:
    print("Not enough expenditure columns to draw stacked composition chart.")


Saved: outputs\nominal_vs_real_gdp.png
Saved: outputs\growth_rates_nominal_vs_real.png
Saved: outputs\growth_gap_inflation_effect.png
Saved: outputs\deflator_vs_cpi.png
Saved: outputs\expenditure_composition_stacked_area.png


In [30]:
# 8_export_and_summary.py
# Save CSV
csv_out = out_dir / "overview_gdp_panel.csv"
gdp.to_csv(csv_out, index=False)
print("Saved merged dataset to:", csv_out)

# Header KPIs (latest year)
latest = gdp['Year'].max()
row_latest = gdp[gdp['Year'] == latest].iloc[0]

def fmt(x, fmt_num="{:,.0f}", pct=False):
    try:
        if pd.isna(x):
            return "N/A"
        return (fmt_num.format(x) + ("%" if pct else ""))
    except:
        return str(x)

kpis = {
    'Year': latest,
    'Nominal_GDP': row_latest.get('Nominal_GDP', np.nan),
    'Real_GDP': row_latest.get('Real_GDP', np.nan),
    'Nominal_Growth_%': row_latest.get('Nominal_Growth_%', np.nan),
    'Real_Growth_%': row_latest.get('Real_Growth_%', np.nan),
    'Growth_Gap_%': row_latest.get('Growth_Gap_%', np.nan),
    'Deflator': row_latest.get('Deflator', np.nan),
    'CPI_Combined': row_latest.get('CPI_Combined', np.nan),
    'CPI_YoY_%': row_latest.get('CPI_YoY_%', np.nan)
}

# 3-year averages (if enough data)
three_yr = gdp[gdp['Year'] >= (latest - 2)]
avg_nom = three_yr['Nominal_Growth_%'].mean()
avg_real = three_yr['Real_Growth_%'].mean()
avg_gap = avg_nom - avg_real

summary_text = f"""
Summary (Overview) — Years {gdp['Year'].min()}–{gdp['Year'].max()} (latest: {latest})

In {latest}, nominal GDP was ₹{fmt(kpis['Nominal_GDP'], '{:,.0f}')}, and real GDP (constant prices) was ₹{fmt(kpis['Real_GDP'], '{:,.0f}')}.
Nominal GDP growth: {fmt(kpis['Nominal_Growth_%'], '{:+.2f}', pct=True)}; Real GDP growth: {fmt(kpis['Real_Growth_%'], '{:+.2f}', pct=True)}.
Nominal-minus-real gap (inflation contribution): {fmt(kpis['Growth_Gap_%'], '{:+.2f}', pct=True)}.
Implied GDP deflator (index): {fmt(kpis['Deflator'], '{:.2f}')}; CPI (Combined): {fmt(kpis['CPI_Combined'], '{:.2f}')}, YoY CPI: {fmt(kpis['CPI_YoY_%'], '{:+.2f}', pct=True)}.

Three-year averages: nominal growth ≈ {avg_nom:.2f}%, real growth ≈ {avg_real:.2f}%, average gap ≈ {avg_gap:.2f}%.
Expenditure composition (where available) shows private consumption (PFCE) as the largest share, investment recovering after the 2020 dip, and net exports generally negative.

Charts saved: nominal_vs_real_gdp.png, growth_rates_nominal_vs_real.png, growth_gap_inflation_effect.png,
deflator_vs_cpi.png (if CPI present), expenditure_composition_stacked_area.png (if components present).

Note: some optional files/components may be missing — verify p3 (component rates) and p9/p10 (per-capita) for additional KPIs.
""".strip()

# print and save
print(summary_text)
with open(out_dir / "overview_summary.txt", "w", encoding="utf-8") as f:
    f.write(summary_text)
print("Saved summary text to:", out_dir / "overview_summary.txt")


Saved merged dataset to: outputs\overview_gdp_panel.csv
Summary (Overview) — Years 2012–2024 (latest: 2024)

In 2024, nominal GDP was ₹30,122,956, and real GDP (constant prices) was ₹17,650,591.
Nominal GDP growth: +12.02%; Real GDP growth: +9.19%.
Nominal-minus-real gap (inflation contribution): +2.83%.
Implied GDP deflator (index): 170.66; CPI (Combined): 197.34, YoY CPI: +5.36%.

Three-year averages: nominal growth ≈ 14.94%, real growth ≈ 8.83%, average gap ≈ 6.11%.
Expenditure composition (where available) shows private consumption (PFCE) as the largest share, investment recovering after the 2020 dip, and net exports generally negative.

Charts saved: nominal_vs_real_gdp.png, growth_rates_nominal_vs_real.png, growth_gap_inflation_effect.png,
deflator_vs_cpi.png (if CPI present), expenditure_composition_stacked_area.png (if components present).

Note: some optional files/components may be missing — verify p3 (component rates) and p9/p10 (per-capita) for additional KPIs.
Saved summar

In [31]:
# 9_optional_extra.py
# Per-capita: try p9/p10
per_capita = None
for cand_idx in [9,10]:
    df = dfs.get(cand_idx)
    if df is not None:
        # attempt to find a likely per-capita column
        choices = [c for c in df.columns if 'Per' in c or 'per' in c or 'Per Capita' in c or 'Per_Capita' in c]
        if choices:
            col = choices[0]
            df2 = df[['End of period', col]].rename(columns={'End of period':'Year', col:'Per_Capita'})
            df2['Year'] = df2['Year'].astype(int)
            merged = pd.merge(gdp[['Year']], df2, on='Year', how='left')
            per_capita = merged.loc[merged['Year'] == gdp['Year'].max(), 'Per_Capita'].values
            if len(per_capita)>0:
                per_capita = per_capita[0]
                print("Found per-capita in p", cand_idx, "-", col)
                break

# GCF/GDP ratio using p2 or p6 if they contain GCF or Gross Capital Formation
gcf_ratio = None
for cand_idx in [2,6]:
    df = dfs.get(cand_idx)
    if df is not None and 'GCF' in df.columns:
        df2 = df[['End of period','GCF']].rename(columns={'End of period':'Year'})
        df2['Year'] = df2['Year'].astype(int)
        merged = pd.merge(gdp[['Year','Nominal_GDP']], df2, on='Year', how='left')
        merged['GCF_to_GDP_pct'] = merged['GCF'] / merged['Nominal_GDP'] * 100
        gcf_ratio = merged.loc[merged['Year']==gdp['Year'].max(), 'GCF_to_GDP_pct'].values
        if len(gcf_ratio)>0:
            gcf_ratio = gcf_ratio[0]
            print("Computed GCF/GDP from p", cand_idx)
            break

print("Per capita (if found):", per_capita)
print("GCF/GDP (if found):", gcf_ratio)


Found per-capita in p 9 - Start of period
Per capita (if found): 2023
GCF/GDP (if found): None
