In [3]:
# Step 1: Import the necessary libraries
import pandas as pd
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score

In [4]:
# Load the dataset
data = pd.read_csv('online_shoppers_intention.csv')

In [5]:
data.dtypes

Administrative               int64
Administrative_Duration    float64
Informational                int64
Informational_Duration     float64
ProductRelated               int64
ProductRelated_Duration    float64
BounceRates                float64
ExitRates                  float64
PageValues                 float64
SpecialDay                 float64
Month                       object
OperatingSystems             int64
Browser                      int64
Region                       int64
TrafficType                  int64
VisitorType                 object
Weekend                       bool
Revenue                       bool
dtype: object

In [6]:
# Step 3: Preprocess the data
# Separate the features and the target variable
X = data.drop(['Revenue'], axis=1)  # Features (all columns except 'Revenue')
y = data['Revenue']  # Target variable

In [7]:
# Convert categorical variables to numerical using one-hot encoding
X = pd.get_dummies(X)

In [8]:
# Step 4: Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [9]:
# Step 5: Train the Random Forest classifier
rf_model = RandomForestClassifier()
rf_model.fit(X_train, y_train)

RandomForestClassifier()

In [10]:
# Step 6: Make predictions on the test set
y_pred = rf_model.predict(X_test)

In [11]:
# Step 7: Evaluate the model
accuracy = accuracy_score(y_test, y_pred)
print("Accuracy:", accuracy)

Accuracy: 0.8937550689375506


In [12]:
# Step 8: Predict whether revenue is on the weekend or not
# Separate the 'Weekend' column for predicting
weekend = data['Weekend']

In [13]:
# Split the data into training and testing sets
weekend_train, weekend_test, y_train, y_test = train_test_split(weekend, y, test_size=0.2, random_state=42)


In [14]:
# Train the Random Forest classifier for predicting weekend
weekend_model = RandomForestClassifier()
weekend_model.fit(weekend_train.values.reshape(-1, 1), y_train)

RandomForestClassifier()

In [15]:
# Make predictions on the test set
weekend_pred = weekend_model.predict(weekend_test.values.reshape(-1, 1))


In [16]:
# Evaluate the model
weekend_accuracy = accuracy_score(y_test, weekend_pred)
print("Weekend Accuracy:", weekend_accuracy)

Weekend Accuracy: 0.8333333333333334


In [17]:
# Step 9: Predict the Informational_Duration using Random Forest regression
# Separate the 'Informational_Duration' column for prediction
info_duration = data['Informational_Duration']


In [18]:
# Split the data into training and testing sets
info_train, info_test, y_train, y_test = train_test_split(info_duration, y, test_size=0.2, random_state=42)

In [19]:
# Train the Random Forest regressor
rf_regressor = RandomForestRegressor()
rf_regressor.fit(info_train.values.reshape(-1, 1), y_train)

# Make predictions on the test set
info_pred = rf_regressor.predict(info_test.values.reshape(-1, 1))

# Step 10: Print the predicted Informational_Duration values
print("Predicted Informational_Duration:")
print(info_pred)

<IPython.core.display.Javascript object>

Predicted Informational_Duration:
[0.13289952 0.04333333 0.13289952 ... 0.22690975 0.01471251 0.13289952]
