In [11]:
import pandas as pd

# Load the dataset
file_path = r'C:\Users\HRUSHIKESH\Downloads\India_GDP_1960-2022.csv'  
df = pd.read_csv(file_path)

# Display the first few rows
print(df.head())


   Year  GDP in (Billion) $  Per Capita in rupees  Growth %
0  2021             3173.40                182160      8.95
1  2020             2667.69                154640     -6.60
2  2019             2831.55                165760      3.74
3  2018             2702.93                159840      6.45
4  2017             2651.47                158480      6.80


In [12]:
import pandas as pd
from sklearn.linear_model import Ridge
from sklearn.model_selection import train_test_split, cross_val_score
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import mean_squared_error, r2_score


# Prepare features (X) and target (y)
X = df[['Year', 'Per Capita in rupees', 'Growth %']]
y = df['GDP in (Billion) $']

# Step 1: Split the data into training and testing sets (80-20 split)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Step 2: Apply StandardScaler for feature scaling
scaler = StandardScaler()

# Fit the scaler on the training data and transform both training and testing data
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# Step 3: Initialize and train the Ridge Regression model
ridge_model = Ridge(alpha=1.0)  

# Step 4: Perform cross-validation to evaluate model's generalization
cv_scores = cross_val_score(ridge_model, X_train_scaled, y_train, cv=5, scoring='neg_mean_squared_error')

# Print average cross-validation score (MSE)
print(f"Average Cross-Validation MSE: {-cv_scores.mean()}")

# Step 5: Train the model on the full training data
ridge_model.fit(X_train_scaled, y_train)

# Step 6: Make predictions on the test set
y_pred = ridge_model.predict(X_test_scaled)

# Step 7: Evaluate model performance
r2 = r2_score(y_test, y_pred)
mse = mean_squared_error(y_test, y_pred)

print(f"R-squared: {r2}")
print(f"Mean Squared Error: {mse}")

# Step 8: Predict GDP for new data 
new_data = pd.DataFrame({'Year': [2025], 'Per Capita in rupees': [155320], 'Growth %': [6]})

# Scale the new data using the fitted scaler
new_data_scaled = scaler.transform(new_data)

# Predict GDP for the new data point
predicted_gdp = ridge_model.predict(new_data_scaled)
print(f"Predicted GDP for Year 2025, Per Capita 2000, Growth 5%: {predicted_gdp[0]}")


Average Cross-Validation MSE: 4166.231142850316
R-squared: 0.9955099486834471
Mean Squared Error: 4447.245120669813
Predicted GDP for Year 2025, Per Capita 2000, Growth 5%: 2528.2171530578134


In [14]:
df['years_since_start'] = df['Year'] - df['Year'].min()
X = df[['years_since_start', 'Per Capita in rupees', 'Growth %']]

In [15]:
import pandas as pd
from sklearn.linear_model import Ridge
from sklearn.model_selection import train_test_split, cross_val_score
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import mean_squared_error, r2_score


# Prepare features (X) and target (y)
X = df[['years_since_start', 'Per Capita in rupees', 'Growth %']]
y = df['GDP in (Billion) $']

# Step 1: Split the data into training and testing sets (80-20 split)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Step 2: Apply StandardScaler for feature scaling
scaler = StandardScaler()

# Fit the scaler on the training data and transform both training and testing data
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# Step 3: Initialize and train the Ridge Regression model
ridge_model = Ridge(alpha=1.0)  

# Step 4: Perform cross-validation to evaluate model's generalization
cv_scores = cross_val_score(ridge_model, X_train_scaled, y_train, cv=5, scoring='neg_mean_squared_error')

# Print average cross-validation score (MSE)
print(f"Average Cross-Validation MSE: {-cv_scores.mean()}")

# Step 5: Train the model on the full training data
ridge_model.fit(X_train_scaled, y_train)

# Step 6: Make predictions on the test set
y_pred = ridge_model.predict(X_test_scaled)

# Step 7: Evaluate model performance
r2 = r2_score(y_test, y_pred)
mse = mean_squared_error(y_test, y_pred)

print(f"R-squared: {r2}")
print(f"Mean Squared Error: {mse}")

# Step 8: Predict GDP for new data 
new_data = pd.DataFrame({'years_since_start': [2025], 'Per Capita in rupees': [155320], 'Growth %': [6]})

# Scale the new data using the fitted scaler
new_data_scaled = scaler.transform(new_data)

# Predict GDP for the new data point
predicted_gdp = ridge_model.predict(new_data_scaled)
print(f"Predicted GDP for Year 2025, Per Capita 2000, Growth 5%: {predicted_gdp[0]}")


Average Cross-Validation MSE: 4166.231142850311
R-squared: 0.9955099486834471
Mean Squared Error: 4447.245120669823
Predicted GDP for Year 2025, Per Capita 2000, Growth 5%: 3647.723343092628
