In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.linear_model import LinearRegression, ElasticNet, Lasso, Ridge
from sklearn.tree import DecisionTreeRegressor
from sklearn.ensemble import RandomForestRegressor
from sklearn.preprocessing import  LabelEncoder
from sklearn.model_selection import TimeSeriesSplit
from sklearn.metrics import mean_squared_error
import xgboost as xgb
import re

In [2]:
train_df = pd.read_csv('apt_200101_200131.csv')

In [3]:
train_df.head()

Unnamed: 0,시군구,번지,본번,부번,단지명,전월세구분,전용면적(㎡),계약년월,계약일,보증금(만원),월세(만원),층,건축년도,도로명,계약기간,계약구분,갱신요구권 사용,종전계약 보증금 (만원),종전계약 월세 (만원)
0,서울특별시 강남구 개포동,658-1,658.0,1.0,개포6차우성아파트1동~8동,전세,79.97,202001,3,43000,0,1,1987,언주로 3,-,-,-,,
1,서울특별시 강남구 개포동,658-1,658.0,1.0,개포6차우성아파트1동~8동,전세,79.97,202001,17,45000,0,5,1987,언주로 3,-,-,-,,
2,서울특별시 강남구 개포동,658-1,658.0,1.0,개포6차우성아파트1동~8동,전세,79.97,202001,18,44000,0,4,1987,언주로 3,-,-,-,,
3,서울특별시 강남구 개포동,652,652.0,0.0,개포우성3차,전세,133.46,202001,15,90000,0,11,1984,개포로 307,-,-,-,,
4,서울특별시 강남구 개포동,652,652.0,0.0,개포우성3차,전세,133.46,202001,18,85000,0,11,1984,개포로 307,-,-,-,,


In [4]:
train_df['동'] = train_df['시군구'].str.replace('서울특별시', '')

In [5]:
train_df.head()

Unnamed: 0,시군구,번지,본번,부번,단지명,전월세구분,전용면적(㎡),계약년월,계약일,보증금(만원),월세(만원),층,건축년도,도로명,계약기간,계약구분,갱신요구권 사용,종전계약 보증금 (만원),종전계약 월세 (만원),동
0,서울특별시 강남구 개포동,658-1,658.0,1.0,개포6차우성아파트1동~8동,전세,79.97,202001,3,43000,0,1,1987,언주로 3,-,-,-,,,강남구 개포동
1,서울특별시 강남구 개포동,658-1,658.0,1.0,개포6차우성아파트1동~8동,전세,79.97,202001,17,45000,0,5,1987,언주로 3,-,-,-,,,강남구 개포동
2,서울특별시 강남구 개포동,658-1,658.0,1.0,개포6차우성아파트1동~8동,전세,79.97,202001,18,44000,0,4,1987,언주로 3,-,-,-,,,강남구 개포동
3,서울특별시 강남구 개포동,652,652.0,0.0,개포우성3차,전세,133.46,202001,15,90000,0,11,1984,개포로 307,-,-,-,,,강남구 개포동
4,서울특별시 강남구 개포동,652,652.0,0.0,개포우성3차,전세,133.46,202001,18,85000,0,11,1984,개포로 307,-,-,-,,,강남구 개포동


In [6]:
remove_gu = ['서울특별시', '강남구', '강동구', '강북구', '강서구', '관악구', '광진구','구로구', '금천구', '노원구', '도봉구', '동대문구', '동작구', '마포구', '서대문구', '서초구', '성동구', '성북구','송파구' , '양천구', '영등포구', '용산구', '은평구', '종로구', '중구', '중랑구']

train_df['동'] = train_df['시군구'].str.replace('|'.join(remove_gu), '', regex=True)

In [7]:
train_df.head()

Unnamed: 0,시군구,번지,본번,부번,단지명,전월세구분,전용면적(㎡),계약년월,계약일,보증금(만원),월세(만원),층,건축년도,도로명,계약기간,계약구분,갱신요구권 사용,종전계약 보증금 (만원),종전계약 월세 (만원),동
0,서울특별시 강남구 개포동,658-1,658.0,1.0,개포6차우성아파트1동~8동,전세,79.97,202001,3,43000,0,1,1987,언주로 3,-,-,-,,,개포동
1,서울특별시 강남구 개포동,658-1,658.0,1.0,개포6차우성아파트1동~8동,전세,79.97,202001,17,45000,0,5,1987,언주로 3,-,-,-,,,개포동
2,서울특별시 강남구 개포동,658-1,658.0,1.0,개포6차우성아파트1동~8동,전세,79.97,202001,18,44000,0,4,1987,언주로 3,-,-,-,,,개포동
3,서울특별시 강남구 개포동,652,652.0,0.0,개포우성3차,전세,133.46,202001,15,90000,0,11,1984,개포로 307,-,-,-,,,개포동
4,서울특별시 강남구 개포동,652,652.0,0.0,개포우성3차,전세,133.46,202001,18,85000,0,11,1984,개포로 307,-,-,-,,,개포동


In [8]:
train_df = train_df[train_df['전월세구분'] == '전세']

In [9]:
train_df.head()

Unnamed: 0,시군구,번지,본번,부번,단지명,전월세구분,전용면적(㎡),계약년월,계약일,보증금(만원),월세(만원),층,건축년도,도로명,계약기간,계약구분,갱신요구권 사용,종전계약 보증금 (만원),종전계약 월세 (만원),동
0,서울특별시 강남구 개포동,658-1,658.0,1.0,개포6차우성아파트1동~8동,전세,79.97,202001,3,43000,0,1,1987,언주로 3,-,-,-,,,개포동
1,서울특별시 강남구 개포동,658-1,658.0,1.0,개포6차우성아파트1동~8동,전세,79.97,202001,17,45000,0,5,1987,언주로 3,-,-,-,,,개포동
2,서울특별시 강남구 개포동,658-1,658.0,1.0,개포6차우성아파트1동~8동,전세,79.97,202001,18,44000,0,4,1987,언주로 3,-,-,-,,,개포동
3,서울특별시 강남구 개포동,652,652.0,0.0,개포우성3차,전세,133.46,202001,15,90000,0,11,1984,개포로 307,-,-,-,,,개포동
4,서울특별시 강남구 개포동,652,652.0,0.0,개포우성3차,전세,133.46,202001,18,85000,0,11,1984,개포로 307,-,-,-,,,개포동


In [10]:
train_df = train_df[['시군구', '동', '단지명','전월세구분','전용면적(㎡)','계약년월','층', '건축년도', '보증금(만원)']]

In [11]:
train_df.head()

Unnamed: 0,시군구,동,단지명,전월세구분,전용면적(㎡),계약년월,층,건축년도,보증금(만원)
0,서울특별시 강남구 개포동,개포동,개포6차우성아파트1동~8동,전세,79.97,202001,1,1987,43000
1,서울특별시 강남구 개포동,개포동,개포6차우성아파트1동~8동,전세,79.97,202001,5,1987,45000
2,서울특별시 강남구 개포동,개포동,개포6차우성아파트1동~8동,전세,79.97,202001,4,1987,44000
3,서울특별시 강남구 개포동,개포동,개포우성3차,전세,133.46,202001,11,1984,90000
4,서울특별시 강남구 개포동,개포동,개포우성3차,전세,133.46,202001,11,1984,85000
