# Linear Regression

In [3]:
import matplotlib.pyplot as plt
import numpy as np
from sklearn import datasets, linear_model
from sklearn.metrics import mean_squared_error, r2_score
from sklearn.model_selection import train_test_split

In [11]:
# load the diabetes dataset
diabetes = datasets.load_diabetes()
diabetes_x = diabetes.data
diabetes_y = diabetes.target

# train_test_split
diabetes_x_train, diabetes_x_test, diabetes_y_train, diabetes_y_test = train_test_split(diabetes_x, diabetes_y, test_size=0.2)

# build linear regression model
model = linear_model.LinearRegression()
# train the model using the training sets
model.fit(diabetes_x_train, diabetes_y_train)
# 把測試資料丟進模型predict y
diabetes_y_pred = model.predict(diabetes_x_test)

# mean squared error
print(f'Mean squared error: {mean_squared_error(diabetes_y_test, diabetes_y_pred)}')
# r2 score
print(f'Variance score: {r2_score(diabetes_y_test, diabetes_y_pred)}')

Mean squared error: 2505.5116783322537
Variance score: 0.4985497321694289


# Polynomial Regression

In [11]:
import numpy as np
import pandas as pd
from sklearn.linear_model import Ridge
from sklearn.preprocessing import PolynomialFeatures
from sklearn.pipeline import make_pipeline
from sklearn import preprocessing, linear_model

In [12]:
def f(x):
    return x**3-x*2+3

# creating 100 data
X = np.linspace(0,10,100)
np.random.shuffle(X)
X = X.reshape(100,1)
y = f(X)

poly = PolynomialFeatures(degree=2).fit(X)
X_poly = poly.transform(X)

# train-test split
x_train, x_test, y_train, y_test = train_test_split(X_poly, y, test_size=0.3, random_state=1)

# Training data normalization
scaler = preprocessing.StandardScaler().fit(x_train)
x_train = scaler.transform(x_train)

# build model
model = linear_model.LinearRegression()
model.fit(x_train, y_train)

# Testing data normalization
x_test = scaler.transform(x_test)

# predict y
y_pred = model.predict(x_test)

print(f'Mean squared error: {mean_squared_error(y_test, y_pred)}')
print(f'Variance score: {r2_score(y_test, y_pred)}')

Mean squared error: 351.47955280298993
Variance score: 0.9951679420996745


# Logistic Regression

In [5]:
import numpy as np
from sklearn.linear_model import LogisticRegression
from sklearn import datasets, preprocessing
from sklearn.model_selection import train_test_split

In [8]:
iris = datasets.load_iris()
x = iris.data
y = iris.target

# train-test split
x_train,x_test,y_train,y_test=train_test_split(x, y, test_size=0.3, random_state=1)

# Training data normalization
scaler = preprocessing.StandardScaler().fit(x_train)
x_train = scaler.transform(x_train)

# build model
model = LogisticRegression()
model.fit(x_train, y_train)

# Testing data normalization
x_test = scaler.transform(x_test)

# predict y
y_pred = model.predict(x_test)

# model直接計算y_pred的正確率
# accuracy = model.score(x_test, y_test)

from sklearn.metrics import mean_squared_error, r2_score, accuracy_score, confusion_matrix
accuracy = accuracy_score(y_test, y_pred)

print(f'Accuracy: {accuracy}')

Accuracy: 0.9555555555555556
