In [None]:
import numpy as np
import pandas as pd
from sklearn.linear_model import LinearRegression

# read sales data, altertnatively, manually import with custom data from or change URL.
file_path = "https://static.bc-edx.com/mbc/ai/m3/datasets/sales.csv"
df_sales = pd.read_csv(file_path)

sales_plot = df_sales.plot.scatter(
  x="ads",
  y="sales",
  title="Sales per Number of Ads"
)

#####################
# Variables (X & Y) #
#####################
# reshape ads into single column array.
X = df_sales["ads"].values.reshape(-1, 1)
# dependent variable.
y = df_sales["sales"]

#######################
# Model fit & Predict #
#######################
# create model
model = LinearRegression()
# fit model
model.fit(X, y)
# Make predictions using the X set
predicted_y_values = model.predict(X)


#######################
#     Mutations       #
#######################
# Create a copy of the original data
df_sales_predicted = df_sales.copy()
# Add a column with the predicted sales values
df_sales_predicted["sales_predicted"] = predicted_y_values
# Display sample data
df_sales_predicted.head()


#######################
#     Plots           #
#######################
# Create a line plot of the predicted salary values
best_fit_line = df_sales_predicted.plot.line(
  x = "ads",
  y = "sales_predicted",
  color = "red"
)
best_fit_line
# Superpose the original data and the best fit line
# Create a scatter plot with the sales information
sales_plot = df_sales_predicted.plot.scatter(
  x="ads",
  y="sales",
  title="Sales per Number of Ads"
)
best_fit_line = df_sales_predicted.plot.line(
  x = "ads",
  y = "sales_predicted",
  color = "red",
  ax=sales_plot
)
sales_plot

#######################
# Manual Predictions  #
#######################
# predict with 100 ads
print(f"Model's formula: y = {model.intercept_} + {model.coef_[0]} * 100")
y_100 = model.intercept_ + model.coef_[0] * 100
# prediction
print(f"Predicted sales with 100 ads: ${y_100:.2f}")


#################################################
# Helper Functions for slope, y-intercept, etc. #
#################################################
# slope
print(f"Model's slope: {model.coef_}")
# y-intercept
print(f"Model's y-intercept: {model.intercept_}")
# best-fit-line
print(f"Model's formula: y = {model.intercept_} + {model.coef_[0]}X")


##############################
#  Read in & display data    #
##############################
# Create a DataFrame for the predicted sales
df_predicted_sales = pd.DataFrame(
  {
    "ads": X_ads.reshape(1, -1)[0],
    "predicted_sales": predicted_sales
  }
)

# Display data
df_predicted_sales