In [7]:
import pandas as pd

# Load the dataset
file_path = r'C:\Users\HRUSHIKESH\Downloads\India_GDP_1960-2022.csv'  
df = pd.read_csv(file_path)

# Display the first few rows
print(df.head())


   Year  GDP in (Billion) $  Per Capita in rupees  Growth %
0  2021             3173.40                182160      8.95
1  2020             2667.69                154640     -6.60
2  2019             2831.55                165760      3.74
3  2018             2702.93                159840      6.45
4  2017             2651.47                158480      6.80


In [8]:
X = df[['Year', 'Per Capita in rupees', 'Growth %']]
y = df['GDP in (Billion) $']

In [9]:
from sklearn.linear_model import Lasso
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, PolynomialFeatures
from sklearn.metrics import mean_squared_error, r2_score
import pandas as pd

# Assuming X and y are already defined

# Step 1: Scale the data
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# Step 2: Add polynomial features (e.g., degree 2)
poly = PolynomialFeatures(degree=2, include_bias=False)
X_poly = poly.fit_transform(X_scaled)

# Step 3: Split into training and testing sets
X_train_poly, X_test_poly, y_train, y_test = train_test_split(X_poly, y, test_size=0.2, random_state=42)

# Step 4: Train Lasso Regression with polynomial features
lasso_model = Lasso(alpha=0.1, max_iter=10000, random_state=42)  
lasso_model.fit(X_train_poly, y_train)

# Step 5: Evaluate the model
y_pred_poly = lasso_model.predict(X_test_poly)
print("R-squared (Lasso):", r2_score(y_test, y_pred_poly))
print("MSE (Lasso):", mean_squared_error(y_test, y_pred_poly))

# Step 6: Predict GDP for new data 
new_data = pd.DataFrame({'Year': [2025], 'Per Capita in rupees': [155320], 'Growth %': [6]})

# Step 7: Scale the new data using the fitted scaler
new_data_scaled = scaler.transform(new_data)

# Step 8: Add polynomial features to the scaled new data
new_data_poly = poly.transform(new_data_scaled)

# Step 9: Predict GDP for the new data
predicted_gdp_poly = lasso_model.predict(new_data_poly)
print("Predicted GDP (Lasso):", predicted_gdp_poly[0])


R-squared (Lasso): 0.9999695301704763
MSE (Lasso): 30.179343424734256
Predicted GDP (Lasso): 2861.2628936383444


In [11]:
df['years_since_start'] = df['Year'] - df['Year'].min()
X = df[['years_since_start', 'Per Capita in rupees', 'Growth %']]

In [12]:
from sklearn.linear_model import Lasso
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, PolynomialFeatures
from sklearn.metrics import mean_squared_error, r2_score
import pandas as pd

# Assuming X and y are already defined

# Step 1: Scale the data
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# Step 2: Add polynomial features 
poly = PolynomialFeatures(degree=2, include_bias=False)
X_poly = poly.fit_transform(X_scaled)

# Step 3: Split into training and testing sets
X_train_poly, X_test_poly, y_train, y_test = train_test_split(X_poly, y, test_size=0.2, random_state=42)

# Step 4: Train Lasso Regression with polynomial features
lasso_model = Lasso(alpha=0.1, max_iter=10000, random_state=42)  
lasso_model.fit(X_train_poly, y_train)

# Step 5: Evaluate the model
y_pred_poly = lasso_model.predict(X_test_poly)
print("R-squared (Lasso):", r2_score(y_test, y_pred_poly))
print("MSE (Lasso):", mean_squared_error(y_test, y_pred_poly))

# Step 6: Predict GDP for new data 
new_data = pd.DataFrame({'years_since_start': [2025], 'Per Capita in rupees': [155320], 'Growth %': [6]})

# Step 7: Scale the new data using the fitted scaler
new_data_scaled = scaler.transform(new_data)

# Step 8: Add polynomial features to the scaled new data
new_data_poly = poly.transform(new_data_scaled)

# Step 9: Predict GDP for the new data
predicted_gdp_poly = lasso_model.predict(new_data_poly)
print("Predicted GDP (Lasso):", predicted_gdp_poly[0])


R-squared (Lasso): 0.9999695301704763
MSE (Lasso): 30.179343424734256
Predicted GDP (Lasso): 169639.97702296247


In [14]:
X = df[['Year', 'Per Capita in rupees', 'Growth %']]
y = df['GDP in (Billion) $']

In [None]:
#the following is with cross validation

In [16]:
from sklearn.linear_model import Lasso
from sklearn.model_selection import cross_val_score, cross_val_predict
from sklearn.preprocessing import PolynomialFeatures, StandardScaler
from sklearn.metrics import mean_squared_error, r2_score
import numpy as np
import pandas as pd



# Step 1: Scale the data
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# Step 2: Add polynomial features (e.g., degree 2)
poly = PolynomialFeatures(degree=2, include_bias=False)
X_poly = poly.fit_transform(X_scaled)

# Step 3: Initialize the Lasso model
lasso_model = Lasso(alpha=0.1, max_iter=10000, random_state=42)  # Adjust alpha as needed

# Step 4: Perform Cross-Validation
cv_scores = cross_val_score(lasso_model, X_poly, y, cv=5, scoring='r2')  # 5-fold CV
cv_predictions = cross_val_predict(lasso_model, X_poly, y, cv=5)

# Step 5: Calculate Cross-Validation Metrics
cv_mse = mean_squared_error(y, cv_predictions)
cv_r2 = r2_score(y, cv_predictions)

print("Average Cross-Validation R-squared:", np.mean(cv_scores))
print("Cross-Validation MSE:", cv_mse)

# Step 6: Train the Lasso model on the full dataset
lasso_model.fit(X_poly, y)

# Step 7: Predict GDP for new data 
new_data = pd.DataFrame({'Year': [2025], 'Per Capita in rupees': [155320], 'Growth %': [6]})
new_data_scaled = scaler.transform(new_data)
new_data_poly = poly.transform(new_data_scaled)

predicted_gdp_poly = lasso_model.predict(new_data_poly)
print("Predicted GDP (Lasso with Cross-Validation):", predicted_gdp_poly[0])


Average Cross-Validation R-squared: 0.8097802261978544
Cross-Validation MSE: 187.6585517462532
Predicted GDP (Lasso with Cross-Validation): 2858.8594964179915
