In [115]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score

# Load the dataset
data = pd.read_csv('/content/sample_data/kc_house_data.csv')

# Convert the 'date' column to datetime and extract year, month, and day
data['date'] = pd.to_datetime(data['date'], format='%Y%m%dT%H%M%S')
data['year'] = data['date'].dt.year
data['month'] = data['date'].dt.month
data['day'] = data['date'].dt.day
data.drop('date', axis=1, inplace=True)

# Check for and handle missing values
null_values = data.isnull().sum()
print(null_values)
data.dropna(inplace=True)

# Split the data into features and target variable
X = data.drop(['price', 'id'], axis=1)
y = data['price']

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Train the model
model = LinearRegression()
model.fit(X_train, y_train)

# Evaluate the model
y_pred = model.predict(X_test)
mae = mean_absolute_error(y_test, y_pred)
mse = mean_squared_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)
print(f'Mean Absolute Error: {mae}')
print(f'Mean Squared Error: {mse}')
print(f'R-squared: {r2}')

# Function to predict house price based on user input
def predict_house_price(model):
    # Collect user inputs
    bedrooms = float(input("Enter the number of bedrooms: "))
    bathrooms = float(input("Enter the number of bathrooms: "))
    sqft_living = float(input("Enter the square footage of the living area: "))
    sqft_lot = float(input("Enter the square footage of the lot: "))
    floors = float(input("Enter the number of floors: "))
    waterfront = int(input("Enter 1 if the house is waterfront, else 0: "))
    view = int(input("Enter the view rating (0-4): "))
    condition = int(input("Enter the condition rating (1-5): "))
    grade = int(input("Enter the grade (1-13): "))
    sqft_above = float(input("Enter the square footage of the above area: "))
    sqft_basement = float(input("Enter the square footage of the basement: "))
    yr_built = int(input("Enter the year the house was built: "))
    yr_renovated = int(input("Enter the year the house was renovated (0 if never): "))
    zipcode = int(input("Enter the zipcode: "))
    lat = float(input("Enter the latitude: "))
    long = float(input("Enter the longitude: "))
    sqft_living15 = float(input("Enter the square footage of the living area for the nearest 15 neighbors: "))
    sqft_lot15 = float(input("Enter the square footage of the lot for the nearest 15 neighbors: "))
    year = int(input("Enter the year: "))
    month = int(input("Enter the month: "))
    day = int(input("Enter the day: "))

    # Create a DataFrame for the new house
    new_house = pd.DataFrame({
        'bedrooms': [bedrooms],
        'bathrooms': [bathrooms],
        'sqft_living': [sqft_living],
        'sqft_lot': [sqft_lot],
        'floors': [floors],
        'waterfront': [waterfront],
        'view': [view],
        'condition': [condition],
        'grade': [grade],
        'sqft_above': [sqft_above],
        'sqft_basement': [sqft_basement],
        'yr_built': [yr_built],
        'yr_renovated': [yr_renovated],
        'zipcode': [zipcode],
        'lat': [lat],
        'long': [long],
        'sqft_living15': [sqft_living15],
        'sqft_lot15': [sqft_lot15],
        'year': [year],
        'month': [month],
        'day': [day]
    }, columns=X_train.columns)  # Ensure columns are in the same order as the training data

    # Predict the price for the new house
    predicted_price = model.predict(new_house)
    print(f'Predicted Price: {predicted_price[0]}')

# Call the function to predict house price based on sample input
predict_house_price(model)


id               0
price            0
bedrooms         0
bathrooms        0
sqft_living      0
sqft_lot         0
floors           0
waterfront       0
view             0
condition        0
grade            0
sqft_above       0
sqft_basement    0
yr_built         0
yr_renovated     0
zipcode          0
lat              0
long             0
sqft_living15    0
sqft_lot15       0
year             0
month            0
day              0
dtype: int64
Mean Absolute Error: 126929.17347036298
Mean Squared Error: 44951491944.93018
R-squared: 0.7026559760834366
Enter the number of bedrooms: 3
Enter the number of bathrooms: 3
Enter the square footage of the living area: 1500
Enter the square footage of the lot: 3000
Enter the number of floors: 1
Enter 1 if the house is waterfront, else 0: 0
Enter the view rating (0-4): 0
Enter the condition rating (1-5): 2
Enter the grade (1-13): 5
Enter the square footage of the above area: 1500
Enter the square footage of the basement: 300
Enter the year the ho