In [1]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [66]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error
from sklearn.metrics import mean_squared_error, mean_absolute_error
from sklearn.model_selection import cross_val_score

In [67]:
# Step 1: Load the data
df = pd.read_csv('/content/drive/MyDrive/df.csv')
df.head()

Unnamed: 0,date,hour,orders
0,2021-11-19,8.0,1
1,2021-11-19,9.0,16
2,2021-11-19,10.0,4
3,2021-11-19,11.0,14
4,2021-11-19,12.0,2


In [68]:
# Extract date components
df['year'] = df['date'].apply(lambda x: int(x.split('-')[0]))
df['month'] = df['date'].apply(lambda x: int(x.split('-')[1]))
df['day'] = df['date'].apply(lambda x: int(x.split('-')[2]))

# Prepare data
X = df[['year', 'month', 'day', 'hour']]
y = df['orders']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=0)

model = LinearRegression()
model.fit(X_train, y_train)

# Predict on test data
y_pred = model.predict(X_test)

# Calculate RMSE
rmse = np.sqrt(mean_squared_error(y_test, y_pred))
print('RMSE:', rmse)

# Calculate other metrics like MAE
mae = mean_absolute_error(y_test, y_pred)
print('MAE:', mae)

# Cross validation
cv_scores = cross_val_score(model, X_train, y_train, cv=3)

print("Cross-validation scores: ", cv_scores)
print("Average cross-validation score: ", np.mean(cv_scores))

# Make predictions
dates = ['2022-02-16', '2022-02-17', '2022-02-18']
hours = range(8,23)

# Imports
from itertools import product

# Cross join function
def cross_join(a, b):
    return list(product(a, b))

# Create dataframe
X_new = pd.DataFrame(cross_join(dates, hours))
X_new.columns = ['date', 'hour']

# Extract date features
X_new['year'] = X_new['date'].apply(lambda x: int(x.split('-')[0]))
X_new['month'] = X_new['date'].apply(lambda x: int(x.split('-')[1]))
X_new['day'] = X_new['date'].apply(lambda x: int(x.split('-')[2]))

# Reorder columns
X_new = X_new[['year', 'month', 'day', 'hour']]

y_pred = model.predict(X_new)

# Save predictions
predictions = X_new.copy()
predictions['orders'] = y_pred
predictions.to_csv('predictions.csv', index=False)

print('Predictions saved to predictions.csv')

RMSE: 7.350178543119819
MAE: 6.125933532935622
Cross-validation scores:  [0.03721981 0.02776894 0.03303146]
Average cross-validation score:  0.03267340410192843
Predictions saved to predictions.csv
