# XGB Regressor
XGBRegressor is a class in the Python library XGBoost (Extreme Gradient Boosting) which is used for regression problems. It is an implementation of gradient boosting decision trees that uses a tree-based ensemble model to make predictions.

The XGBRegressor class allows you to create an XGBoost model for regression tasks by specifying various hyperparameters. Some of the important hyperparameters that can be set include the number of trees to be built, the depth of the trees, learning rate, and regularization parameters.

After creating an XGBRegressor model, you can fit it to your training data using the fit() method and make predictions on new data using the predict() method.

# Bike Sharing Usage Prediction

In [42]:
#importing library
import numpy as np
#pandas 
import pandas as pd
#plotly.express
import plotly.express as px
#train_test_split
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from xgboost import XGBRegressor

# Loading the Dataset

In [43]:
df=pd.read_csv('/kaggle/input/london-bike-sharing-dataset/london_merged.csv')
#showing the dataset
df

Unnamed: 0,timestamp,cnt,t1,t2,hum,wind_speed,weather_code,is_holiday,is_weekend,season
0,2015-01-04 00:00:00,182,3.0,2.0,93.0,6.0,3.0,0.0,1.0,3.0
1,2015-01-04 01:00:00,138,3.0,2.5,93.0,5.0,1.0,0.0,1.0,3.0
2,2015-01-04 02:00:00,134,2.5,2.5,96.5,0.0,1.0,0.0,1.0,3.0
3,2015-01-04 03:00:00,72,2.0,2.0,100.0,0.0,1.0,0.0,1.0,3.0
4,2015-01-04 04:00:00,47,2.0,0.0,93.0,6.5,1.0,0.0,1.0,3.0
...,...,...,...,...,...,...,...,...,...,...
17409,2017-01-03 19:00:00,1042,5.0,1.0,81.0,19.0,3.0,0.0,0.0,3.0
17410,2017-01-03 20:00:00,541,5.0,1.0,81.0,21.0,4.0,0.0,0.0,3.0
17411,2017-01-03 21:00:00,337,5.5,1.5,78.5,24.0,4.0,0.0,0.0,3.0
17412,2017-01-03 22:00:00,224,5.5,1.5,76.0,23.0,4.0,0.0,0.0,3.0


# Getting the Preliminary Information

In [44]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 17414 entries, 0 to 17413
Data columns (total 10 columns):
 #   Column        Non-Null Count  Dtype  
---  ------        --------------  -----  
 0   timestamp     17414 non-null  object 
 1   cnt           17414 non-null  int64  
 2   t1            17414 non-null  float64
 3   t2            17414 non-null  float64
 4   hum           17414 non-null  float64
 5   wind_speed    17414 non-null  float64
 6   weather_code  17414 non-null  float64
 7   is_holiday    17414 non-null  float64
 8   is_weekend    17414 non-null  float64
 9   season        17414 non-null  float64
dtypes: float64(8), int64(1), object(1)
memory usage: 1.3+ MB


# Preprocessing

In [45]:
def onehot_encode(df,columns):
    df=df.copy()
    for column in columns:
        dummies=pd.get_dummies(df[column],prefix=column)
        df=pd.concat([df,dummies],axis=1)
        df=df.drop(column,axis=1)
    return df

In [46]:
def preprocess_inputs(df):
    df=df.copy()
    df['Month']=pd.to_datetime(df['timestamp']).dt.month
    
    df['Day']=pd.to_datetime(df['timestamp']).dt.day
    df['Hour']=pd.to_datetime(df['timestamp']).dt.hour
    df=df.drop('timestamp',axis=1)
    
    onehot_columns=['weather_code','season']
    df=onehot_encode(df,onehot_columns)
    
    y=df['cnt']
    x=df.drop('cnt',axis=1)
    
    #train_test_split
    x_train,x_test,y_train,y_test=train_test_split(x,y,train_size=0.7)
    
    scaler=StandardScaler()
    scaler.fit(x_train)
    x_train=pd.DataFrame(scaler.transform(x_train),columns=x_train.columns)
    x_test=pd.DataFrame(scaler.transform(x_test),columns=x_test.columns)
    
    
    
    return x_train,x_test,y_train,y_test

In [47]:
x_train,x_test,y_train,y_test=preprocess_inputs(df)
print(x_train.shape)
print(x_train.shape)
print(y_train.shape)
print(y_test.shape)

(12189, 20)
(12189, 20)
(12189,)
(5225,)


In [48]:
x_test

Unnamed: 0,t1,t2,hum,wind_speed,is_holiday,is_weekend,Month,Day,Hour,weather_code_1.0,weather_code_2.0,weather_code_3.0,weather_code_4.0,weather_code_7.0,weather_code_10.0,weather_code_26.0,season_0.0,season_1.0,season_2.0,season_3.0
0,0.988442,0.975282,-0.284432,-1.325121,-0.152492,1.596717,0.140984,-1.221381,-0.656242,1.349398,-0.554321,-0.499641,-0.305581,-0.37061,-0.027183,-0.061548,-0.583255,1.723848,-0.571256,-0.574792
1,-0.985037,-1.141997,0.616013,0.386784,-0.152492,-0.626285,-1.018959,1.619446,-1.377724,1.349398,-0.554321,-0.499641,-0.305581,-0.37061,-0.027183,-0.061548,1.714515,-0.580098,-0.571256,-0.574792
2,-0.177705,-0.007740,-1.635099,-1.261717,-0.152492,-0.626285,-1.018959,0.710382,-0.079056,-0.741071,1.804009,-0.499641,-0.305581,-0.37061,-0.027183,-0.061548,1.714515,-0.580098,-0.571256,-0.574792
3,1.167849,1.126516,0.408218,-0.754486,-0.152492,1.596717,-0.149002,-1.335014,1.075316,-0.741071,-0.554321,-0.499641,3.272451,-0.37061,-0.027183,-0.061548,-0.583255,1.723848,-0.571256,-0.574792
4,1.347257,1.277750,-2.016057,0.006361,-0.152492,-0.626285,0.430970,-0.880482,1.075316,1.349398,-0.554321,-0.499641,-0.305581,-0.37061,-0.027183,-0.061548,-0.583255,1.723848,-0.571256,-0.574792
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
5220,-1.702666,-1.520082,1.031603,-1.198314,-0.152492,1.596717,1.300927,0.710382,1.652502,1.349398,-0.554321,-0.499641,-0.305581,-0.37061,-0.027183,-0.061548,-0.583255,-0.580098,1.750530,-0.574792
5221,0.450221,0.521580,1.308663,-0.881294,-0.152492,1.596717,0.720955,0.255849,-0.656242,-0.741071,-0.554321,2.001437,-0.305581,-0.37061,-0.027183,-0.061548,-0.583255,-0.580098,1.750530,-0.574792
5222,2.064885,1.882687,-1.946792,-0.120447,-0.152492,-0.626285,0.430970,0.142216,0.209537,1.349398,-0.554321,-0.499641,-0.305581,-0.37061,-0.027183,-0.061548,-0.583255,1.723848,-0.571256,-0.574792
5223,3.320736,2.941327,-2.223852,0.386784,-0.152492,-0.626285,0.140984,0.369482,0.209537,1.349398,-0.554321,-0.499641,-0.305581,-0.37061,-0.027183,-0.061548,-0.583255,1.723848,-0.571256,-0.574792


# Training the Model

In [49]:
model=XGBRegressor()
model.fit(x_train,y_train)
print(model.score(x_test,y_test))

0.9577302533420575


In [50]:
y_pred=model.predict(x_test)
rmse=np.sqrt(np.mean((y_pred-y_test)**2))
print(rmse)

220.2118919678655


In [51]:
y_pred

array([ 246.89127 ,    7.507941,  812.0829  , ..., 1819.6274  ,
       1968.8503  ,  561.64984 ], dtype=float32)