### P034 使用Numpy实现线性回归

In [3]:
import numpy as np
import pandas as pd

In [4]:
df = pd.DataFrame({'years': [1, 2, 3, 4, 5, 6],
                   'salary': [4000, 4250, 4500, 4750, 5000, 5250]})
df

Unnamed: 0,years,salary
0,1,4000
1,2,4250
2,3,4500
3,4,4750
4,5,5000
5,6,5250


In [5]:
m = len(df)
m

6

In [6]:
X1 = df['years'].values
Y = df['salary'].values

In [7]:
X1 = X1.reshape(m, 1)
X1

array([[1],
       [2],
       [3],
       [4],
       [5],
       [6]], dtype=int64)

In [8]:
bias = np.ones((m, 1))
bias

array([[1.],
       [1.],
       [1.],
       [1.],
       [1.],
       [1.]])

In [9]:
X = np.append(bias, X1, axis=1)
X

array([[1., 1.],
       [1., 2.],
       [1., 3.],
       [1., 4.],
       [1., 5.],
       [1., 6.]])

In [10]:
np.dot(X.T, X)

array([[ 6., 21.],
       [21., 91.]])

In [11]:
np.linalg.inv(np.dot(X.T, X))

array([[ 0.86666667, -0.2       ],
       [-0.2       ,  0.05714286]])

In [12]:
np.dot(X.T, Y)

array([ 27750., 101500.])

In [13]:
np.dot(np.linalg.inv(np.dot(X.T, X)), np.dot(X.T, Y))

array([3750.,  250.])

In [14]:
coefs = np.dot(np.linalg.inv(np.dot(X.T, X)), np.dot(X.T, Y))
print(f'Linear regression: {coefs[0]:.2f} + {coefs[1]:.2f}x')

Linear regression: 3750.00 + 250.00x


### P035 使用Sklearn实现线性回归

In [15]:
from sklearn.linear_model import LinearRegression

In [16]:
df

Unnamed: 0,years,salary
0,1,4000
1,2,4250
2,3,4500
3,4,4750
4,5,5000
5,6,5250


In [17]:
model = LinearRegression()

In [18]:
model.fit(df[['years']], df[['salary']])

LinearRegression()

In [20]:
model.intercept_

array([3750.])

In [21]:
model.coef_

array([[250.]])

In [19]:
f'Linear regression: {model.intercept_[0]:.2f} + {model.coef_[0][0]:.2f}x'

'Linear regression: 3750.00 + 250.00x'

### P036 读取csv实现线性回归

In [22]:
import numpy as np
import pandas as pd
from sklearn.linear_model import LinearRegression

In [32]:
df = pd.read_csv('p036.csv')
df.head(10)

Unnamed: 0,variable,target
0,-1.136602,-13.152922
1,-1.418556,-36.316645
2,1.744814,104.18484
3,-0.232182,-15.822773
4,-0.489337,-24.081511
5,0.289094,41.156585
6,1.331587,101.41637
7,-0.529296,-24.057524
8,1.128785,4.933819
9,-0.1746,44.850265


In [26]:
data = df[['variable']]
data.head(3)

Unnamed: 0,variable
0,-1.136602
1,-1.418556
2,1.744814


In [27]:
target = df['target']
target.head(3)

0    -13.152922
1    -36.316645
2    104.184840
Name: target, dtype: float64

In [28]:
model = LinearRegression()

In [29]:
model.fit(data, target)

LinearRegression()

In [30]:
model.score(data, target)

0.5531873717999665