In [1]:
import pandas as pd      
import numpy as np 
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
import pickle

In [2]:
def eval_metrics(actual, pred):
    rmse = np.sqrt(mean_squared_error(actual, pred))
    mae = mean_absolute_error(actual, pred)
    mse = mean_squared_error(actual, pred)
    score = r2_score(actual, pred)
    return print(" r2_score:", score, "\n","mae:", mae, "\n","mse:",mse, "\n","rmse:",rmse)

In [3]:
df = pd.read_csv("Advertising.csv")
df.head()

Unnamed: 0,TV,radio,newspaper,sales
0,230.1,37.8,69.2,22.1
1,44.5,39.3,45.1,10.4
2,17.2,45.9,69.3,9.3
3,151.5,41.3,58.5,18.5
4,180.8,10.8,58.4,12.9


In [4]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 200 entries, 0 to 199
Data columns (total 4 columns):
 #   Column     Non-Null Count  Dtype  
---  ------     --------------  -----  
 0   TV         200 non-null    float64
 1   radio      200 non-null    float64
 2   newspaper  200 non-null    float64
 3   sales      200 non-null    float64
dtypes: float64(4)
memory usage: 6.4 KB


In [5]:
X= df.drop("sales", axis=1)
y= df["sales"]
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)
model = LinearRegression()
model.fit(X_train, y_train)
filename = 'my_model'
pickle.dump(model, open(filename, 'wb'))

In [6]:
df.describe()

Unnamed: 0,TV,radio,newspaper,sales
count,200.0,200.0,200.0,200.0
mean,147.0425,23.264,30.554,14.0225
std,85.854236,14.846809,21.778621,5.217457
min,0.7,0.0,0.3,1.6
25%,74.375,9.975,12.75,10.375
50%,149.75,22.9,25.75,12.9
75%,218.825,36.525,45.1,17.4
max,296.4,49.6,114.0,27.0


In [13]:
y_pred = model.predict(X_test)
eval_metrics(y_test, y_pred)

 r2_score: 0.8609466508230368 
 mae: 1.5116692224549084 
 mse: 3.7967972367152187 
 rmse: 1.9485372043446383


**deployment**

In [14]:
final_model = pickle.load(open('my_model', 'rb'))

In [15]:
columns=list(X.columns)
columns

['TV', 'radio', 'newspaper']

In [16]:
my_dict = {
    "TV": 150,
    "radio": 25,
    "newspaper": 30,
}

In [17]:
df = pd.DataFrame.from_dict([my_dict])

In [18]:
df

Unnamed: 0,TV,radio,newspaper
0,150,25,30


In [19]:
prediction = final_model.predict(df)
print(prediction)

[14.50650223]


In [20]:
print("The estimated value of sales is {}. ".format(int(prediction[0])))

The estimated value of sales is 14. 


In [2]:
conda env list

# conda environments:
#
base                     /Users/kadiryildirim/opt/anaconda3
eu13_test2            *  /Users/kadiryildirim/opt/anaconda3/envs/eu13_test2


Note: you may need to restart the kernel to use updated packages.


In [3]:
pip list

Package               Version
--------------------- -----------
altair                4.2.2
anyio                 3.5.0
argon2-cffi           21.3.0
argon2-cffi-bindings  21.2.0
asttokens             2.0.5
attrs                 22.1.0
backcall              0.2.0
beautifulsoup4        4.12.2
bleach                4.1.0
blinker               1.6.2
cachetools            5.3.0
certifi               2022.12.7
cffi                  1.15.1
charset-normalizer    3.1.0
click                 8.1.3
colorama              0.4.6
comm                  0.1.2
debugpy               1.5.1
decorator             5.1.1
defusedxml            0.7.1
entrypoints           0.4
executing             0.8.3
fastjsonschema        2.16.2
gitdb                 4.0.10
GitPython             3.1.31
idna                  3.4
importlib-metadata    6.0.0
ipykernel             6.19.2
ipython               8.12.0
ipython-genutils      0.2.0
jedi                  0.18.1
Jinja2                3.1.2
joblib                1.2.0
j

In [3]:
pip install -r requirements.txt

Collecting scikit-learn==1.0.2
  Downloading scikit_learn-1.0.2-cp39-cp39-macosx_10_13_x86_64.whl (8.0 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m8.0/8.0 MB[0m [31m6.1 MB/s[0m eta [36m0:00:00[0m00:01[0m00:01[0m
[?25hCollecting streamlit==1.10.0
  Downloading streamlit-1.10.0-py2.py3-none-any.whl (9.1 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m9.1/9.1 MB[0m [31m6.7 MB/s[0m eta [36m0:00:00[0m00:01[0m00:01[0m
[?25hCollecting scipy>=1.1.0
  Downloading scipy-1.10.1-cp39-cp39-macosx_10_9_x86_64.whl (35.2 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m35.2/35.2 MB[0m [31m6.3 MB/s[0m eta [36m0:00:00[0m00:01[0m00:01[0m
[?25hCollecting numpy>=1.14.6
  Downloading numpy-1.24.3-cp39-cp39-macosx_10_9_x86_64.whl (19.8 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m19.8/19.8 MB[0m [31m6.4 MB/s[0m eta [36m0:00:00[0m00:01[0m00:01[0m
[?25hCollecting joblib>=0.11
  Downloading jobl

Note: you may need to restart the kernel to use updated packages.


In [5]:
pip list

Package               Version
--------------------- -----------
altair                4.2.2
anyio                 3.5.0
argon2-cffi           21.3.0
argon2-cffi-bindings  21.2.0
asttokens             2.0.5
attrs                 22.1.0
backcall              0.2.0
beautifulsoup4        4.12.2
bleach                4.1.0
blinker               1.6.2
cachetools            5.3.0
certifi               2022.12.7
cffi                  1.15.1
charset-normalizer    3.1.0
click                 8.1.3
colorama              0.4.6
comm                  0.1.2
debugpy               1.5.1
decorator             5.1.1
defusedxml            0.7.1
entrypoints           0.4
executing             0.8.3
fastjsonschema        2.16.2
gitdb                 4.0.10
GitPython             3.1.31
idna                  3.4
importlib-metadata    6.0.0
ipykernel             6.19.2
ipython               8.12.0
ipython-genutils      0.2.0
jedi                  0.18.1
Jinja2                3.1.2
joblib                1.2.0
j

In [6]:
!streamlit run my_app.py

^C
