In [31]:
from IPython.display import display, HTML
display(HTML("""
<style>
div.container{width:95% !important;}
div.cell.code_cell.rendered{width:92%;}
div.text_cell_render.rendered_html{width:90%;}
div.input_prompt{padding:0px;}
div.CodeMirror {font-family:Consolas; font-size:20pt;}
table.simpletable{font-size:18pt;}
div.text_cell_render.rendered_html{font-size:18pt;}
div.text_cell_render.rendered_html{font-size:18pt;}
div.output {font-size:18pt; font-weight:bold;}
div.input {font-family:Consolas; font-size:18pt;}
div.prompt {min-width:70px;}
div#toc-wrapper{padding-top:120px;}
div.text_cell_render ul li{font-size:18pt;padding:5px;}
table.dataframe{font-size:18px;}
</style>
"""))

- 데이터 소스 : 국토교통부 실거래가 공개시스템 https://rt.molit.go.kr/pt/xls/xls.do

In [2]:
import pandas as pd
import statsmodels.api as sm # 회귀모델
import joblib # pkl이나 joblib로 모델 저장, load

In [5]:
df = pd.read_csv('../data/trade_apt_api.csv', comment='#', encoding='cp949')
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 318 entries, 0 to 317
Data columns (total 13 columns):
 #   Column   Non-Null Count  Dtype  
---  ------   --------------  -----  
 0   거래금액     318 non-null    int64  
 1   건축년도     318 non-null    int64  
 2   년        318 non-null    int64  
 3   법정동      318 non-null    object 
 4   아파트      318 non-null    object 
 5   월        318 non-null    int64  
 6   일        318 non-null    int64  
 7   전용면적     318 non-null    float64
 8   지번       318 non-null    object 
 9   지역코드     318 non-null    int64  
 10  층        318 non-null    int64  
 11  해제사유발생일  318 non-null    object 
 12  해제여부     318 non-null    object 
dtypes: float64(1), int64(7), object(5)
memory usage: 32.4+ KB


In [6]:
df.sample()

Unnamed: 0,거래금액,건축년도,년,법정동,아파트,월,일,전용면적,지번,지역코드,층,해제사유발생일,해제여부
258,124950,2000,2021,무악동,현대,2,20,84.92,82,11110,10,-,-


In [11]:
# pd.options.mode.copy_on_write = True
X = df[['건축년도', '전용면적', '층']].copy()
X['const'] = 1
y = df['거래금액']
X.shape, y.shape

((318, 4), (318,))

In [12]:
model = sm.OLS(y, X).fit() # 회귀모델
model.summary()
# R-squared(0.648) : X가 타겟(종속)변수를 설명해 주는 수치(설명도)
# Adj. R-squared(0.644) : 수정된 r 제곱(설명도)
# Durbin-Watson(1.352) : (자기 상관이 있는지 수치 : 이상치는 2)
# 계수 : coef

0,1,2,3
Dep. Variable:,거래금액,R-squared:,0.648
Model:,OLS,Adj. R-squared:,0.644
Method:,Least Squares,F-statistic:,192.4
Date:,"Wed, 09 Jul 2025",Prob (F-statistic):,8.54e-71
Time:,10:12:14,Log-Likelihood:,-3777.5
No. Observations:,318,AIC:,7563.0
Df Residuals:,314,BIC:,7578.0
Df Model:,3,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
건축년도,1925.6916,212.616,9.057,0.000,1507.360,2344.023
전용면적,962.1507,47.367,20.313,0.000,868.955,1055.347
층,2058.1524,417.716,4.927,0.000,1236.276,2880.028
const,-3.855e+06,4.25e+05,-9.069,0.000,-4.69e+06,-3.02e+06

0,1,2,3
Omnibus:,20.985,Durbin-Watson:,1.352
Prob(Omnibus):,0.0,Jarque-Bera (JB):,42.734
Skew:,0.345,Prob(JB):,5.25e-10
Kurtosis:,4.658,Cond. No.,433000.0


In [19]:
X.iloc[0], y[0]

(건축년도     2002.00
 전용면적       84.82
 층           1.00
 const       1.00
 Name: 0, dtype: float64,
 80000)

In [28]:
round(model.predict([2005, 84, 8, 1])[0] * 10000, 1)

1031843229.7

In [24]:
format(1031843229, ',')

'1,031,843,229'

In [25]:
# 모델저장
joblib.dump(model, '../model/ex1_apt_price_regression.joblib')

['../model/ex1_apt_price_regression.joblib']

In [29]:
def predict_apt_price(year, square, floor):
    loaded_model = joblib.load('../model/ex1_apt_price_regression.joblib')
    input_data = [[year, square, floor, 1]]
    result = round(loaded_model.predict(input_data)[0] * 10000, 1)
    return format(result, ',') + '원입니다'

In [30]:
year = int(input('몇년 건축 ?'))
square = int(input('몇 제곱미터?'))
floor  = int(input('몇 층?'))
predict_apt_price(year, square, floor)

몇년 건축 ?2005
몇 제곱미터?84
몇 층?3


'928,935,608.1원입니다'