In [5]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split

df = pd.read_csv('../data/Fish.csv')

data = df[['Length2']]
target = df['Weight']

train_input, test_input, train_target, test_target = train_test_split(
    data, target, test_size=50, random_state=42
)

train_poly = np.column_stack((train_input ** 2, train_input))
test_poly = np.column_stack((test_input ** 2, test_input))
print(train_input.head())
print(train_poly[:5])

     Length2
112     36.5
11      31.0
109     32.8
69      23.0
127     44.0
[[1332.25   36.5 ]
 [ 961.     31.  ]
 [1075.84   32.8 ]
 [ 529.     23.  ]
 [1936.     44.  ]]


In [None]:
from sklearn.linear_model import LinearRegression

lr = LinearRegression()
lr.fit(train_poly, train_target)

In [None]:
print(lr.score(train_poly, train_target))

In [None]:
import matplotlib.pyplot as plt

plt.figure(figsize=(8, 4))
plt.title('Predicting Weight by Length')

# 실제 데이터 점 찍기
plt.scatter(train_input, train_target, color='blue', alpha=0.5, label='Train Data')
plt.scatter(test_input, test_target, color='green', alpha=0.5, label='Train Data')

point = np.arange(5, 65)
plt.plot(point, lr.coef_[0]*point**2 + lr.coef_[1]*point + lr.intercept_, 
         color='red', linewidth=3, label='Polynomial Regression Curve')

plt.xlabel('Length')
plt.ylabel('Weight')
plt.legend()
plt.grid(True, linestyle='--', alpha=0.25)
plt.show()

# 모델 성능 확인 (결정계수 R^2)
print(f"학습 세트 점수: {lr.score(train_poly, train_target):.4f}")
print(f"테스트 세트 점수: {lr.score(test_poly, test_target):.4f}")