### P034 使用Numpy实现线性回归

In [1]:
import numpy as np
import pandas as pd

In [2]:
df = pd.DataFrame({'years': [1, 2, 3, 4, 5, 6],
                   'salary': [4000, 4250, 4500, 4750, 5000, 5250]})
df

Unnamed: 0,years,salary
0,1,4000
1,2,4250
2,3,4500
3,4,4750
4,5,5000
5,6,5250


In [3]:
m = len(df)
m

6

In [4]:
X1 = df["years"].values
X1

array([1, 2, 3, 4, 5, 6], dtype=int64)

In [5]:
X = np.append(np.ones((m, 1)), X1.reshape(m, 1), axis=1)
X

array([[1., 1.],
       [1., 2.],
       [1., 3.],
       [1., 4.],
       [1., 5.],
       [1., 6.]])

In [7]:
y = df["salary"]

In [8]:
np.dot(X.T, y)

array([ 27750., 101500.])

In [9]:
coefs = np.dot(np.linalg.inv(np.dot(X.T, X)), np.dot(X.T, y))

In [10]:
coefs

array([3750.,  250.])

In [11]:
f"y = {coefs[0]} + {coefs[1]}x"

'y = 3749.9999999999964 + 250.0x'

### P035 使用Sklearn实现线性回归

In [12]:
from sklearn.linear_model import LinearRegression

In [13]:
df

Unnamed: 0,years,salary
0,1,4000
1,2,4250
2,3,4500
3,4,4750
4,5,5000
5,6,5250


In [14]:
model = LinearRegression()

In [15]:
model.fit(
    df[["years"]],
    df[["salary"]]
)

LinearRegression()

In [16]:
model.intercept_

array([3750.])

In [17]:
model.coef_

array([[250.]])

In [18]:
f"y = {model.intercept_[0]} + {model.coef_[0][0]}x"

'y = 3750.0 + 250.0x'

### P036 读取csv实现线性回归

In [19]:
import numpy as np
import pandas as pd
from sklearn.linear_model import LinearRegression

In [20]:
df = pd.read_csv("./p036.csv")

In [22]:
df.head(3)

Unnamed: 0,variable,target
0,-1.136602,-13.152922
1,-1.418556,-36.316645
2,1.744814,104.18484


In [23]:
model = LinearRegression()

In [24]:
model.fit(
    df[["variable"]],
    df["target"]
)

LinearRegression()

In [25]:
model.score(
    df[["variable"]],
    df["target"]
)

0.5531873717999665