# 거래량과 종가

In [33]:
import yfinance as yf

# 삼성전자 (예: 005930.KS), 기간: 최근 6개월
ticker = '005930.KS'  # '.KS'는 한국거래소(KRX) 종목
stock = yf.Ticker(ticker)

# 주가 데이터 다운로드 (시작일~종료일 지정)
df = stock.history(start="2024-01-01", end="2024-06-01")

# 결과 확인
print(df.head())
df = stock.history(period="1mo")

                                   Open          High           Low  \
Date                                                                  
2024-01-02 00:00:00+09:00  76094.115323  77651.028168  76094.115323   
2024-01-03 00:00:00+09:00  76386.043425  76677.964610  74926.437500   
2024-01-04 00:00:00+09:00  74050.667857  75218.352501  74050.667857   
2024-01-05 00:00:00+09:00  74634.510179  75023.738393  74342.589018   
2024-01-08 00:00:00+09:00  74926.441585  75412.976920  74342.599183   

                                  Close    Volume  Dividends  Stock Splits  
Date                                                                        
2024-01-02 00:00:00+09:00  77456.414062  17142847        0.0           0.0  
2024-01-03 00:00:00+09:00  74926.437500  21753644        0.0           0.0  
2024-01-04 00:00:00+09:00  74537.203125  15324439        0.0           0.0  
2024-01-05 00:00:00+09:00  74537.203125  11304316        0.0           0.0  
2024-01-08 00:00:00+09:00  74439.906250 

In [34]:
# 어제 거래량 컬럼 생성 (lag)
df['Volume_lag1'] = df['Volume'].shift(1)

# NaN 제거 (첫 행은 어제 데이터 없음)
df = df.dropna()

# 결과 확인
print(df[['Close', 'Volume', 'Volume_lag1']].head())

                             Close    Volume  Volume_lag1
Date                                                     
2025-05-12 00:00:00+09:00  57600.0  15414702    7814322.0
2025-05-13 00:00:00+09:00  56900.0  16842801   15414702.0
2025-05-14 00:00:00+09:00  57400.0  12468089   16842801.0
2025-05-15 00:00:00+09:00  57300.0  13139736   12468089.0
2025-05-16 00:00:00+09:00  56800.0  10385352   13139736.0


# 범주형 변수와 교호작용

In [35]:
import pandas as pd

# 데이터 생성
data = pd.DataFrame({
    'EngineSize': [1.6, 2.0, 3.0, 1.4, 2.2, 2.5, 3.5, 1.2],
    'FuelType': ['Gasoline', 'Diesel', 'Gasoline', 'Diesel', 'Gasoline', 'Diesel', 'Gasoline', 'Diesel'],
    'Price': [18000, 22000, 27000, 16000, 24000, 25000, 32000, 15000]
})

In [36]:
# 범주형 변수 'FuelType'을 더미변수로 변환
data_encoded = pd.get_dummies(data, columns=['FuelType'], drop_first=True)
data_encoded['Interaction'] = data_encoded['EngineSize'] * data_encoded['FuelType_Gasoline']
print(data_encoded)

   EngineSize  Price  FuelType_Gasoline  Interaction
0         1.6  18000               True          1.6
1         2.0  22000              False          0.0
2         3.0  27000               True          3.0
3         1.4  16000              False          0.0
4         2.2  24000               True          2.2
5         2.5  25000              False          0.0
6         3.5  32000               True          3.5
7         1.2  15000              False          0.0


In [37]:
# 회귀 모델 적합
from statsmodels.formula.api import ols
model = ols(formula='Price~EngineSize+FuelType_Gasoline+Interaction', data=data_encoded).fit()

# 결과 출력
print(model.summary())

                            OLS Regression Results                            
Dep. Variable:                  Price   R-squared:                       0.981
Model:                            OLS   Adj. R-squared:                  0.967
Method:                 Least Squares   F-statistic:                     70.43
Date:                Mon, 09 Jun 2025   Prob (F-statistic):           0.000643
Time:                        03:42:37   Log-Likelihood:                -64.240
No. Observations:                   8   AIC:                             136.5
Df Residuals:                       4   BIC:                             136.8
Df Model:                           3                                         
Covariance Type:            nonrobust                                         
                                coef    std err          t      P>|t|      [0.025      0.975]
---------------------------------------------------------------------------------------------
Intercept             

  return hypotest_fun_in(*args, **kwds)


In [38]:
from sklearn.linear_model import LinearRegression
# 독립변수(X)와 종속변수(y) 설정
X = data_encoded[['EngineSize', 'FuelType_Gasoline', 'Interaction']]
y = data_encoded['Price']

# 모델 학습
reg = LinearRegression()
reg.fit(X, y)

# 계수 출력
print("절편:", reg.intercept_)
print("회귀계수:", reg.coef_)

절편: 5181.384248210023
회귀계수: [ 8066.82577566  2488.41598681 -1239.563731  ]
