In [51]:
# Import necessary libraries
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from xgboost import XGBRegressor
from sklearn.metrics import mean_squared_error
from sklearn.model_selection import cross_val_score

In [52]:
# Read the CSV file into a DataFrame
df = pd.read_csv('bike_rentals.csv')

In [53]:
# Display information about the DataFrame
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 10886 entries, 0 to 10885
Data columns (total 12 columns):
 #   Column      Non-Null Count  Dtype  
---  ------      --------------  -----  
 0   datetime    10886 non-null  object 
 1   season      10886 non-null  int64  
 2   holiday     10886 non-null  int64  
 3   workingday  10886 non-null  int64  
 4   weather     10886 non-null  int64  
 5   temp        10886 non-null  float64
 6   atemp       10886 non-null  float64
 7   humidity    10886 non-null  int64  
 8   windspeed   10886 non-null  float64
 9   casual      10886 non-null  int64  
 10  registered  10886 non-null  int64  
 11  count       10886 non-null  int64  
dtypes: float64(3), int64(8), object(1)
memory usage: 1020.7+ KB


In [54]:
# Drop unnecessary columns from the DataFrame
df = df.drop(['datetime', 'casual', 'registered'], axis=1)

In [55]:
# Separate features (X) and target variable (y)
X = df.iloc[:, :-1]  # Features (all columns except the last one)
y = df.iloc[:, -1]   # Target variable (the last column)

In [56]:
# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=10)

In [57]:
# Create an instance of the XGBRegressor model
xgb_reg = XGBRegressor()

In [58]:
# Fit the model to the training data
xgb_reg.fit(X_train, y_train)

In [59]:
# Make predictions on the testing data
y_pred = xgb_reg.predict(X_test)

# Display the predictions
print(y_pred)

[152.79662 455.99005 226.87386 ... 283.17352 237.05595 317.5177 ]


In [60]:
# Calculate the R-squared score on the testing data
r_squared = xgb_reg.score(X_test, y_test)
print("R-squared score:", r_squared)

R-squared score: 0.30952750083616


In [61]:
# Calculate the Mean Squared Error (MSE)
mse = mean_squared_error(y_test, y_pred)
print("Mean Squared Error (MSE): ", mse)

Mean Squared Error (MSE):  23641.809392301926


In [62]:
# Calculate the Root Mean Squared Error (RMSE)
rmse = np.sqrt(mse)
print("Root Mean Squared Error (RMSE):", rmse)

Root Mean Squared Error (RMSE): 153.75893272360446


In [63]:
# Create a new instance of XGBRegressor with specified objective
model = XGBRegressor(objective='reg:squarederror')

In [64]:
# Perform cross-validation and calculate negative mean squared error scores
scores = cross_val_score(model, X, y, scoring='neg_mean_squared_error', cv=10)

In [65]:
# Calculate RMSE for each fold
rmse_cv = np.sqrt(-scores)
print("Cross-validated RMSE scores:", rmse_cv)

Cross-validated RMSE scores: [112.56774515 124.7896452  185.83426857 144.75848887 157.79269603
 136.69771471 200.13917397 200.1678087  218.96740725 189.32863913]
