In [1]:
# %load ../standard_import.txt
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d import axes3d
import seaborn as sns

from sklearn.preprocessing import scale
import sklearn.linear_model as skl_lm
from sklearn.metrics import mean_squared_error, r2_score
import statsmodels.api as sm
import statsmodels.formula.api as smf
from sklearn.linear_model import LinearRegression

%matplotlib inline
plt.style.use('seaborn-white')

In [2]:
advertising = pd.read_csv('Advertising.csv', usecols=[1,2,3,4])
advertising.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 200 entries, 0 to 199
Data columns (total 4 columns):
 #   Column     Non-Null Count  Dtype  
---  ------     --------------  -----  
 0   TV         200 non-null    float64
 1   radio      200 non-null    float64
 2   newspaper  200 non-null    float64
 3   sales      200 non-null    float64
dtypes: float64(4)
memory usage: 6.4 KB


In [3]:
print(advertising)

        TV  radio  newspaper  sales
0    230.1   37.8       69.2   22.1
1     44.5   39.3       45.1   10.4
2     17.2   45.9       69.3    9.3
3    151.5   41.3       58.5   18.5
4    180.8   10.8       58.4   12.9
..     ...    ...        ...    ...
195   38.2    3.7       13.8    7.6
196   94.2    4.9        8.1    9.7
197  177.0    9.3        6.4   12.8
198  283.6   42.0       66.2   25.5
199  232.1    8.6        8.7   13.4

[200 rows x 4 columns]


In [4]:
x_1 = advertising.TV
x_2 = advertising.radio
print(x_1,x_2)

0      230.1
1       44.5
2       17.2
3      151.5
4      180.8
       ...  
195     38.2
196     94.2
197    177.0
198    283.6
199    232.1
Name: TV, Length: 200, dtype: float64 0      37.8
1      39.3
2      45.9
3      41.3
4      10.8
       ... 
195     3.7
196     4.9
197     9.3
198    42.0
199     8.6
Name: radio, Length: 200, dtype: float64


In [5]:
advertising["TV*radio"] = advertising["TV"] * advertising["radio"]

In [6]:
print(advertising)

        TV  radio  newspaper  sales  TV*radio
0    230.1   37.8       69.2   22.1   8697.78
1     44.5   39.3       45.1   10.4   1748.85
2     17.2   45.9       69.3    9.3    789.48
3    151.5   41.3       58.5   18.5   6256.95
4    180.8   10.8       58.4   12.9   1952.64
..     ...    ...        ...    ...       ...
195   38.2    3.7       13.8    7.6    141.34
196   94.2    4.9        8.1    9.7    461.58
197  177.0    9.3        6.4   12.8   1646.10
198  283.6   42.0       66.2   25.5  11911.20
199  232.1    8.6        8.7   13.4   1996.06

[200 rows x 5 columns]


In [7]:
x = advertising[["TV","radio","TV*radio"]]

In [8]:
print(x)

        TV  radio  TV*radio
0    230.1   37.8   8697.78
1     44.5   39.3   1748.85
2     17.2   45.9    789.48
3    151.5   41.3   6256.95
4    180.8   10.8   1952.64
..     ...    ...       ...
195   38.2    3.7    141.34
196   94.2    4.9    461.58
197  177.0    9.3   1646.10
198  283.6   42.0  11911.20
199  232.1    8.6   1996.06

[200 rows x 3 columns]


In [9]:
x_3 = x_1*x_2
print(x_3)

0       8697.78
1       1748.85
2        789.48
3       6256.95
4       1952.64
         ...   
195      141.34
196      461.58
197     1646.10
198    11911.20
199     1996.06
Length: 200, dtype: float64


In [10]:
type(x_3)

pandas.core.series.Series

In [11]:
x = pd.concat([x_1, x_2, x_3], axis=1)
print(x)

        TV  radio         0
0    230.1   37.8   8697.78
1     44.5   39.3   1748.85
2     17.2   45.9    789.48
3    151.5   41.3   6256.95
4    180.8   10.8   1952.64
..     ...    ...       ...
195   38.2    3.7    141.34
196   94.2    4.9    461.58
197  177.0    9.3   1646.10
198  283.6   42.0  11911.20
199  232.1    8.6   1996.06

[200 rows x 3 columns]


In [12]:
y = advertising.sales
print(y)

0      22.1
1      10.4
2       9.3
3      18.5
4      12.9
       ... 
195     7.6
196     9.7
197    12.8
198    25.5
199    13.4
Name: sales, Length: 200, dtype: float64


In [13]:
est = smf.ols('sales ~ TV + radio + TV*radio', advertising).fit()
est.summary().tables[1]

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
Intercept,6.7502,0.248,27.233,0.000,6.261,7.239
TV,0.0191,0.002,12.699,0.000,0.016,0.022
radio,0.0289,0.009,3.241,0.001,0.011,0.046
TV:radio,0.0011,5.24e-05,20.727,0.000,0.001,0.001


In [14]:
reg = LinearRegression().fit(x, y)

In [15]:
reg.score(x, y)

0.9677905498482523

In [16]:
reg.coef_

array([0.01910107, 0.02886034, 0.00108649])

In [17]:
reg.intercept_

6.750220203075117

### The intercept and coefficients of TV, Radio and Radio*TV are same with they calculated by smf toolbox

In [18]:
pip install notebook-as-pdf

Collecting notebook-as-pdf
  Downloading notebook_as_pdf-0.5.0-py3-none-any.whl (6.5 kB)
Collecting PyPDF2
  Downloading PyPDF2-1.26.0.tar.gz (77 kB)
[K     |████████████████████████████████| 77 kB 2.5 MB/s eta 0:00:01
[?25hCollecting pyppeteer
  Downloading pyppeteer-1.0.2-py3-none-any.whl (83 kB)
[K     |████████████████████████████████| 83 kB 6.1 MB/s  eta 0:00:01
Collecting certifi>=2021
  Downloading certifi-2021.10.8-py2.py3-none-any.whl (149 kB)
[K     |████████████████████████████████| 149 kB 14.0 MB/s eta 0:00:01
[?25hCollecting websockets<11.0,>=10.0
  Downloading websockets-10.1-cp38-cp38-macosx_10_9_x86_64.whl (96 kB)
[K     |████████████████████████████████| 96 kB 10.4 MB/s eta 0:00:01
[?25hCollecting pyee<9.0.0,>=8.1.0
  Downloading pyee-8.2.2-py2.py3-none-any.whl (12 kB)
Building wheels for collected packages: PyPDF2
  Building wheel for PyPDF2 (setup.py) ... [?25ldone
[?25h  Created wheel for PyPDF2: filename=PyPDF2-1.26.0-py3-none-any.whl size=61085 sha256=db8

In [20]:
!jupyter-nbconvert --to pdfviahtml example.ipynb

This application is used to convert notebook files (*.ipynb) to various other
formats.


Options
The options below are convenience aliases to configurable class-options,
as listed in the "Equivalent to" description-line of the aliases.
To see all configurable class-options for some <cmd>, use:
    <cmd> --help-all

--debug
    set log level to logging.DEBUG (maximize logging output)
    Equivalent to: [--Application.log_level=10]
--generate-config
    generate default config file
    Equivalent to: [--JupyterApp.generate_config=True]
-y
    Answer yes to any questions instead of prompting.
    Equivalent to: [--JupyterApp.answer_yes=True]
--execute
    Execute the notebook prior to export.
    Equivalent to: [--ExecutePreprocessor.enabled=True]
--allow-errors
    Continue notebook execution even if one of the cells throws an error and include the error message in the cell output (the default behaviour is to abort conversion). This flag is only relevant if '--exec