In [2]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, r2_score
from sklearn.preprocessing import OneHotEncoder, StandardScaler
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline


In [5]:
data = pd.read_csv("complex_water_footprint_dataset.csv")


In [15]:
categorical_features = ['Occupation', 'Lifestyle', 'Presence_of_garden', 'Dishwasher_usage', 'Washing_machine_usage', 'Swimming_pool', 'Water_storage']
numerical_features = ['Number_of_people', 'Average_Age', 'Total_Income', 'House_Size', 'Number_of_bathrooms']

preprocessor = ColumnTransformer(
    transformers=[
        ('num', StandardScaler(), numerical_features),
        ('cat', OneHotEncoder(drop='first', handle_unknown='ignore'), categorical_features)
    ])



In [16]:
model = Pipeline(steps=[('preprocessor', preprocessor),
                        ('regressor', LinearRegression())])


In [17]:
X = data.drop('Water_footprint', axis=1)
y = data['Water_footprint']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=0)


In [18]:
model.fit(X_train, y_train)


In [19]:
y_pred = model.predict(X_test)

mse = mean_squared_error(y_test, y_pred)
rmse = np.sqrt(mse)
r2 = r2_score(y_test, y_pred)

print(f"Mean Squared Error: {mse}")
print(f"Root Mean Squared Error: {rmse}")
print(f"R2 Score: {r2}")


Mean Squared Error: 41730.70724265732
Root Mean Squared Error: 204.28095173720266
R2 Score: 0.9969484918530892


In [20]:
import joblib

joblib.dump(model, "lr.pkl")


['lr.pkl']

In [21]:
'''
def get_user_input():
    user_data = {}
    user_data['Number_of_people'] = int(input("Enter number of people: "))
    user_data['Average_Age'] = float(input("Enter average age: "))
    user_data['Total_Income'] = float(input("Enter total income: "))
    user_data['Occupation'] = input("Enter occupation (Salaried/Business/Retired): ")
    user_data['Lifestyle'] = input("Enter lifestyle (Minimalist/Luxury): ")
    user_data['House_Size'] = float(input("Enter house size: "))
    user_data['Presence_of_garden'] = input("Is there a garden? (Yes/No): ")
    user_data['Number_of_bathrooms'] = int(input("Enter number of bathrooms: "))
    user_data['Dishwasher_usage'] = input("Enter dishwasher usage (Never/Daily/Weekly): ")
    user_data['Washing_machine_usage'] = input("Enter washing machine usage (Never/Daily/Weekly/Rarely): ")
    user_data['Swimming_pool'] = input("Is there a swimming pool? (Yes/No): ")
    user_data['Water_storage'] = input("Enter water storage (Well/Direct Supply/Tank): ")

    return pd.DataFrame([user_data])

user_input = get_user_input()
predicted_water_footprint = model.predict(user_input)
print(f"Predicted Water Footprint: {predicted_water_footprint[0]}")
'''

'\ndef get_user_input():\n    user_data = {}\n    user_data[\'Number_of_people\'] = int(input("Enter number of people: "))\n    user_data[\'Average_Age\'] = float(input("Enter average age: "))\n    user_data[\'Total_Income\'] = float(input("Enter total income: "))\n    user_data[\'Occupation\'] = input("Enter occupation (Salaried/Business/Retired): ")\n    user_data[\'Lifestyle\'] = input("Enter lifestyle (Minimalist/Luxury): ")\n    user_data[\'House_Size\'] = float(input("Enter house size: "))\n    user_data[\'Presence_of_garden\'] = input("Is there a garden? (Yes/No): ")\n    user_data[\'Number_of_bathrooms\'] = int(input("Enter number of bathrooms: "))\n    user_data[\'Dishwasher_usage\'] = input("Enter dishwasher usage (Never/Daily/Weekly): ")\n    user_data[\'Washing_machine_usage\'] = input("Enter washing machine usage (Never/Daily/Weekly/Rarely): ")\n    user_data[\'Swimming_pool\'] = input("Is there a swimming pool? (Yes/No): ")\n    user_data[\'Water_storage\'] = input("Enter

In [22]:
def get_user_input():
    user_data = {}

    user_data['Number_of_people'] = int(input("Enter number of people: "))
    user_data['Average_Age'] = float(input("Enter average age: "))
    user_data['Total_Income'] = float(input("Enter total income: "))
    
    print("1. Salaried\n2. Business\n3. Retired")
    occupation_choice = int(input("Enter your occupation choice: "))
    if occupation_choice == 1:
        user_data['Occupation'] = 'Salaried'
    elif occupation_choice == 2:
        user_data['Occupation'] = 'Business'
    elif occupation_choice == 3:
        user_data['Occupation'] = 'Retired'
    
    print("1. Minimalist\n2. Luxury")
    lifestyle_choice = int(input("Enter your lifestyle choice: "))
    user_data['Lifestyle'] = 'Minimalist' if lifestyle_choice == 1 else 'Luxury'
    
    user_data['House_Size'] = float(input("Enter house size: "))
    
    garden_choice = int(input("Is there a garden? 1. Yes 2. No: "))
    user_data['Presence_of_garden'] = 'Yes' if garden_choice == 1 else 'No'
    
    user_data['Number_of_bathrooms'] = int(input("Enter number of bathrooms: "))
    
    print("1. Never\n2. Daily\n3. Weekly")
    dishwasher_usage_choice = int(input("Enter dishwasher usage choice: "))
    if dishwasher_usage_choice == 1:
        user_data['Dishwasher_usage'] = 'Never'
    elif dishwasher_usage_choice == 2:
        user_data['Dishwasher_usage'] = 'Daily'
    elif dishwasher_usage_choice == 3:
        user_data['Dishwasher_usage'] = 'Weekly'
    
    print("1. Never\n2. Daily\n3. Weekly\n4. Rarely")
    washing_machine_usage_choice = int(input("Enter washing machine usage choice: "))
    if washing_machine_usage_choice == 1:
        user_data['Washing_machine_usage'] = 'Never'
    elif washing_machine_usage_choice == 2:
        user_data['Washing_machine_usage'] = 'Daily'
    elif washing_machine_usage_choice == 3:
        user_data['Washing_machine_usage'] = 'Weekly'
    elif washing_machine_usage_choice == 4:
        user_data['Washing_machine_usage'] = 'Rarely'
    
    pool_choice = int(input("Is there a swimming pool? 1. Yes 2. No: "))
    user_data['Swimming_pool'] = 'Yes' if pool_choice == 1 else 'No'
    
    print("1. Well\n2. Direct Supply\n3. Tank")
    water_storage_choice = int(input("Enter water storage choice: "))
    if water_storage_choice == 1:
        user_data['Water_storage'] = 'Well'
    elif water_storage_choice == 2:
        user_data['Water_storage'] = 'Direct Supply'
    elif water_storage_choice == 3:
        user_data['Water_storage'] = 'Tank'

    return pd.DataFrame([user_data])

user_input = get_user_input()
predicted_water_footprint = model.predict(user_input)
print(f"Predicted Water Footprint: {predicted_water_footprint[0]}")


1. Salaried
2. Business
3. Retired
1. Minimalist
2. Luxury
1. Never
2. Daily
3. Weekly
1. Never
2. Daily
3. Weekly
4. Rarely
1. Well
2. Direct Supply
3. Tank
Predicted Water Footprint: 533985896339706.7


