In [3]:
import pandas as pd
import numpy as np

# Visualization
import seaborn as sns
import matplotlib
import matplotlib.pyplot as plt
%matplotlib inline
from matplotlib import font_manager, rc
plt.rcParams['font.family'] = 'NanumGothic'
import platform
if platform.system() == 'Windows':
    font_name = font_manager.FontProperties(fname="c:/Windows/Fonts/malgun.ttf").get_name()
    rc('font', family=font_name)
else:    
    rc('font', family='AppleGothic')

matplotlib.rcParams['axes.unicode_minus'] = False

import os
path = os.path.dirname(os.getcwd())
path = os.path.dirname(path)
path = os.path.join(path, "Daesamanlap/ProcessedData")
train = pd.read_csv(os.path.join(path,"merged_train.csv"))
test = pd.read_csv(os.path.join(path,"merged_test.csv"))

import warnings
warnings.filterwarnings(action='ignore')

train=train.drop_duplicates(keep='first')
test=test.drop_duplicates(keep='first')

In [4]:
## 1. EDA 및 전처리(baseline: MSE 98)

### 1) 상가비율

tmp = train.groupby(['단지코드','임대건물구분']).count().iloc[:,:1].reset_index()
store = tmp[tmp['임대건물구분']=='상가'].reset_index(drop=True)[['단지코드','총세대수']].rename(columns={'총세대수':'상가수'})
apt = tmp[tmp['임대건물구분']=='아파트'].reset_index(drop=True)[['단지코드','총세대수']].rename(columns={'총세대수':'아파트수'})
total = pd.merge(apt,store,on='단지코드',how='left').fillna(0)
total['상가비율'] = total.apply(lambda x : x['상가수']/(x['아파트수']+x['상가수']),axis=1)
train = pd.merge(train,total[['단지코드','상가비율']],on='단지코드',how='left')

tmp = test.groupby(['단지코드','임대건물구분']).count().iloc[:,:1].reset_index()
store = tmp[tmp['임대건물구분']=='상가'].reset_index(drop=True)[['단지코드','총세대수']].rename(columns={'총세대수':'상가수'})
apt = tmp[tmp['임대건물구분']=='아파트'].reset_index(drop=True)[['단지코드','총세대수']].rename(columns={'총세대수':'아파트수'})
total = pd.merge(apt,store,on='단지코드',how='left').fillna(0)
total['상가비율'] = total.apply(lambda x : x['상가수']/(x['아파트수']+x['상가수']),axis=1)
test = pd.merge(test,total[['단지코드','상가비율']],on='단지코드',how='left')

### 2) 세대당가능주차면수

train['세대당_가능주차면수'] = train.apply(lambda x : x['단지내주차면수']/x['총세대수'],axis=1)
test['세대당_가능주차면수'] = test.apply(lambda x : x['단지내주차면수']/x['총세대수'],axis=1)

### 3) 공가비율

train['공가비율'] = train.apply(lambda x : x['공가수']/x['총세대수'],axis=1)
test['공가비율'] = test.apply(lambda x : x['공가수']/x['총세대수'],axis=1)

### 4) 대형전용면적

level = 85

train['대형전용면적'] = train['전용면적'].apply(lambda x : 1 if x>level else 0)
test['대형전용면적'] = test['전용면적'].apply(lambda x : 1 if x>level else 0)

### 5) 공급유형
#- 공공임대 5년/10년 $\rightarrow$ 공공임대(단기)

train.loc[train.공급유형.isin(['공공임대(5년)', '공공임대(10년)']), '공급유형'] = '공공임대(단기)'
test.loc[test.공급유형.isin(['공공임대(5년)', '공공임대(10년)']), '공급유형'] = '공공임대(단기)'

#- 1차원으로 합치기

not_unique = ['공급유형','전용면적','임대건물구분','전용면적별세대수','대형전용면적','임대보증금','임대료','자격유형']

train_one = train.drop(not_unique,axis=1).drop_duplicates().reset_index(drop=True)
test_one = test.drop(not_unique,axis=1).drop_duplicates().reset_index(drop=True)

#- 전용면적별세대수 채우기

for i in train['공급유형'].unique():
    train_one[i] = 0
    test_one[i] = 0

sample = pd.DataFrame(train.groupby(['단지코드','공급유형']).sum()['전용면적별세대수']).reset_index()
for i in sample.index:
    code = sample.loc[i,'단지코드']
    col = sample.loc[i,'공급유형']
    val = sample.loc[i,'전용면적별세대수']  # 전용면적별세대수
    ind = train_one[train_one['단지코드']==code].index
    train_one.loc[ind,col] = val

sample = pd.DataFrame(test.groupby(['단지코드','공급유형']).sum()['전용면적별세대수']).reset_index()
for i in sample.index:
    code = sample.loc[i,'단지코드']
    col = sample.loc[i,'공급유형']
    val = sample.loc[i,'전용면적별세대수']  # 전용면적별세대수
    ind = test_one[test_one['단지코드']==code].index
    test_one.loc[ind,col] = val

### 6) 임대료 임대보증금 : 평균

sample = train.groupby('단지코드').mean()[['임대료','임대보증금']].reset_index()
train_one=pd.merge(train_one, sample, on="단지코드", how="left")

sample = test.groupby('단지코드').mean()[['임대료','임대보증금']].reset_index()
test_one=pd.merge(test_one, sample, on="단지코드", how="left")

### 연면적 / 전용면적은 나중에 drop

from sklearn.impute import KNNImputer
imputer = KNNImputer(n_neighbors=2) # 12345 중에 2가 제일 좋음

sample = train.groupby('단지코드').mean()[['전용면적']].reset_index()
train_one=pd.merge(train_one, sample, on="단지코드", how="left")

sample = test.groupby('단지코드').mean()[['전용면적']].reset_index()
test_one=pd.merge(test_one, sample, on="단지코드", how="left")

train_one["연면적new"]=train_one.연면적
test_one["연면적new"]=test_one.연면적

train_one.loc[(train_one.연면적new<train_one.전용면적), "연면적new"]=np.nan
test_one.loc[(test_one.연면적new<test_one.전용면적), "연면적new"]=np.nan

#train_one.isna().sum() # 63

#test_one.isna().sum() #23

im=train_one[["단지내주차면수", "총세대수", "전용면적", "연면적new"]]
im = pd.DataFrame(imputer.fit_transform(im),
                     columns = im.columns)

train_one["연면적new"]=im["연면적new"]

imp=test_one[["단지내주차면수", "총세대수", "전용면적", "연면적new"]]
imp = pd.DataFrame(imputer.fit_transform(imp),
                     columns = imp.columns)

test_one["연면적new"]=imp["연면적new"]

### 7) 대형전용면적 총개수

sample = pd.DataFrame(train.groupby('단지코드')['대형전용면적'].sum()).reset_index()
train_one = pd.merge(train_one, sample, on="단지코드", how="left")

sample = pd.DataFrame(test.groupby('단지코드')['대형전용면적'].sum()).reset_index()
test_one = pd.merge(test_one, sample, on="단지코드", how="left")

### 8) 변수제거

col = ['단지명','도로명주소','subway_name','임대상가', '전용면적']

train_one = train_one.drop(col,axis=1)
test_one = test_one.drop(col,axis=1)

In [5]:
train_up = train_one[train_one['단지내주차면수']>=987].reset_index(drop=True)
train_down = train_one[train_one['단지내주차면수']<987].reset_index(drop=True)
test_up = test_one[test_one['단지내주차면수']>=987].reset_index(drop=True)
test_down = test_one[test_one['단지내주차면수']<987].reset_index(drop=True)

In [6]:
def areafunc(x):
    if x in ['제주특별자치도','경상남도','부산광역시']:
        return 0 
    elif x in ['광주광역시','세종특별자치시','경상북도','전라북도','서울특별시']:
        return 1
    elif x in ['대구광역시']:
        return 2
    else: 
        return 3
    

In [7]:
train_up['지역'] = train_up['지역'].apply(lambda x : areafunc(x))
test_up['지역'] = test_up['지역'].apply(lambda x : areafunc(x))
train_up = train_up.join(pd.get_dummies(train_up['지역'],prefix='area')).iloc[:,:-1]
test_up = test_up.join(pd.get_dummies(test_up['지역'],prefix='area')).iloc[:,:-1]

## 모델링

In [8]:
from sklearn.model_selection import train_test_split
from sklearn.model_selection import cross_val_score, KFold, RepeatedKFold
from sklearn.metrics import mean_absolute_error
from catboost import CatBoostRegressor

In [9]:
def fit(X_train, X_test, y_train, y_test, model):
    try:
        model.fit(X_train, y_train, verbose=False)
    except:
        model.fit(X_train, y_train)
    tst_pred = model.predict(X_test)
    error = mean_absolute_error(y_test,tst_pred)
    return model, tst_pred, error


def fit_models(X, y, models, cv=False):
    X = np.array(X.reset_index(drop=True))
    y = np.array(y.reset_index(drop=True))
    best_error = 10000
    best_tst_pred = 0
    best_model = ""
    if cv==False:
        X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, shuffle=True,random_state=2021)
        for m in models:
            model, tst_pred, error = fit(X_train, X_test, y_train, y_test, m)
            if best_error > error:
                best_error = error
                best_tst_pred = tst_pred
                best_model = model
    else:
        best_tst_idx = []
        kf = KFold(n_splits = 5, shuffle=True,random_state = 2021)
        for m in models:
            mae = []
            pred= []
            tst_idx = []
            for train_index, test_index in kf.split(X, y):
                X_train, X_test = X[train_index], X[test_index]
                y_train, y_test = y[train_index], y[test_index]
                model, tst_pred, error = fit(X_train, X_test, y_train, y_test, m)
                mae.append(error)
                pred.append(tst_pred.tolist())
                tst_idx.append(test_index.tolist())
            cv_error = np.mean(mae)
            if best_error > cv_error:
                best_error = cv_error
                best_tst_pred = pred
                best_tst_idx = sum(tst_idx, [])
                best_model = model
    best_tst_pred = sum(best_tst_pred,[])
    best_tst_pred = dict(zip(best_tst_idx,best_tst_pred))
    try:
        best_model = best_model.fit(np.concatenate((X_train, X_test),axis=0), 
                                                np.concatenate((y_train, y_test),axis=0), 
                                                verbose=False)
    except:
        best_model = best_model.fit(np.concatenate((X_train, X_test),axis=0), 
                                        np.concatenate((y_train, y_test),axis=0))
    return best_model, best_tst_pred, best_error

### 2) train/test up

In [18]:
X = train_up.drop(columns=['단지코드','등록차량수','지역','상가비율', "연면적"],axis=1)
X = X[['총세대수', '공가수', '지하철역', '버스정류장', '단지내주차면수',  '연면적new','위도', '경도',
       'subway_dist', '환승역 수', '총인구수', '세대당_인구', '남/여비율', '남/여_0~19세',
       '남/여_20~39세', '남/여_40~69세', '남/여_70세이상', '0~19세_비율', '20~39세_비율',
       '40~69세_비율', '70세이상_비율', '세대당_가능주차면수', '공가비율', '국민임대', '영구임대',
       '공공임대(단기)', '장기전세', '행복주택', '공공임대(분납)', '공공분양', '공공임대(50년)', '임대료',
       '임대보증금' , '대형전용면적', 'area_0', 'area_1', 'area_2', ]]
y = train_up['등록차량수']

In [19]:
models = [
         CatBoostRegressor(random_state=2021, loss_function = 'MAE'),
        
         ]
model_up,pred_up,error_up = fit_models(X, y, models, cv=True)
print('MAE : ',error_up)

MAE :  291.87954235273975


- 공가수/단지내주차면수

- 공가수/등록차량수

- 공가수/위도

- 공가수/경도
- 
- 공가수/subway_dist

- 세대당인구/subway_dist

- 공가수/총인구수

- 공가수/세대당인구

- 공가수/남여비율

- 공가수/남-여20-29세



In [73]:
X["단지*공가"] =X["단지내주차면수"]*X["공가수"]
X["남여2039*공가"] =X["남/여_20~39세"]*X["공가수"]
X["남여비율*공가"] =X["남/여비율"]*X["공가수"]
X["총인구수*공가"] =X["총인구수"]*X["공가수"]
X["세대당인구*공가"] =X["세대당_인구"]*X["공가수"]
X["subway_dist*공가"] =X["subway_dist"]*X["공가수"]
X["세대당인구*subway_dist"] =X["subway_dist"]*X["세대당_인구"]

In [74]:
X["단지/공가"] =X["단지내주차면수"]/X["공가수"]
X["남여2039/공가"] =X["남/여_20~39세"]/X["공가수"]
X["남여비율/공가"] =X["남/여비율"]/X["공가수"]
X["총인구수/공가"] =X["총인구수"]/X["공가수"]
X["세대당인구/공가"] =X["세대당_인구"]/X["공가수"]
X["subway_dist/공가"] =X["subway_dist"]/X["공가수"]
X["세대당인구/subway_dist"] =X["subway_dist"]/X["세대당_인구"]

In [75]:
X0 = X[['총세대수', '공가수', '지하철역', '버스정류장', "단지내주차면수", '연면적new','위도', '경도',
       'subway_dist', '환승역 수', '총인구수', '세대당_인구', '남/여비율', '남/여_0~19세',
    "남/여_20~39세", '남/여_40~69세', '남/여_70세이상', '0~19세_비율', '20~39세_비율',
       '40~69세_비율', '70세이상_비율', '세대당_가능주차면수', '공가비율', '국민임대', '영구임대',
       '공공임대(단기)', '장기전세', '행복주택', '공공임대(분납)', '공공분양', '공공임대(50년)', '임대료',
       '임대보증금' , '대형전용면적', 'area_0', 'area_1', 'area_2',"단지*공가"]]
y = train_up['등록차량수']

In [76]:
models = [
         CatBoostRegressor(random_state=2021, loss_function = 'MAE'),
        
         ]
model_up,pred_up,error_up = fit_models(X0, y, models, cv=True)
print('MAE : ',error_up)

MAE :  297.3752094129575


In [79]:
X0 = X[['총세대수', '공가수', '지하철역', '버스정류장', "단지내주차면수", '연면적new','위도', '경도',
       'subway_dist', '환승역 수', '총인구수', '세대당_인구', '남/여비율', '남/여_0~19세',
    "남/여_20~39세", '남/여_40~69세', '남/여_70세이상', '0~19세_비율', '20~39세_비율',
       '40~69세_비율', '70세이상_비율', '세대당_가능주차면수', '공가비율', '국민임대', '영구임대',
       '공공임대(단기)', '장기전세', '행복주택', '공공임대(분납)', '공공분양', '공공임대(50년)', '임대료',
       '임대보증금' , '대형전용면적', 'area_0', 'area_1', 'area_2',"단지/공가","단지*공가"]]
y = train_up['등록차량수']

In [80]:
models = [
         CatBoostRegressor(random_state=2021, loss_function = 'MAE'),
        
         ]
model_up,pred_up,error_up = fit_models(X0, y, models, cv=True)
print('MAE : ',error_up)

MAE :  285.1889871220469


In [83]:
X0 = X[['총세대수', '공가수',  '버스정류장', "단지내주차면수", '연면적new','위도', '경도',
       'subway_dist', '환승역 수', '총인구수', '세대당_인구', '남/여비율', '남/여_0~19세',
    "남/여_20~39세", '남/여_40~69세', '남/여_70세이상', '0~19세_비율', '20~39세_비율',
       '40~69세_비율', '70세이상_비율', '세대당_가능주차면수', '공가비율', '국민임대', '영구임대',
       '공공임대(단기)', '장기전세', '행복주택', '공공임대(분납)', '공공분양', '공공임대(50년)', '임대료',
       '임대보증금' , '대형전용면적', 'area_0', 'area_1', 'area_2',"총인구수*공가"]]
y = train_up['등록차량수']

In [84]:
models = [
         CatBoostRegressor(random_state=2021, loss_function = 'MAE'),
        
         ]
model_up,pred_up,error_up = fit_models(X0, y, models, cv=True)
print('MAE : ',error_up)

MAE :  281.6286884162005


In [87]:
X0 = X[['총세대수', '공가수',  '버스정류장', "단지내주차면수", '연면적new','위도', '경도',
       'subway_dist', '환승역 수', '총인구수', '세대당_인구', '남/여비율', '남/여_0~19세',
    "남/여_20~39세", '남/여_40~69세', '남/여_70세이상', '0~19세_비율', '20~39세_비율',
       '40~69세_비율', '70세이상_비율', '세대당_가능주차면수', '공가비율', '국민임대', '영구임대',
       '공공임대(단기)', '장기전세', '행복주택', '공공임대(분납)', '공공분양', '공공임대(50년)', '임대료',
       '임대보증금' , '대형전용면적', 'area_0', 'area_1', 'area_2',"세대당인구*subway_dist"]]
y = train_up['등록차량수']

In [88]:
models = [
         CatBoostRegressor(random_state=2021, loss_function = 'MAE'),
        
         ]
model_up,pred_up,error_up = fit_models(X0, y, models, cv=True)
print('MAE : ',error_up)

MAE :  290.7144466797361


In [66]:
X0 = X[['총세대수', '공가수',  '버스정류장', "단지내주차면수", '연면적new','위도', '경도',
       'subway_dist', '환승역 수', '총인구수', '세대당_인구', '남/여비율', '남/여_0~19세',
 '남/여_40~69세', '남/여_70세이상', '0~19세_비율', '20~39세_비율',
       '40~69세_비율', '70세이상_비율', '세대당_가능주차면수', '공가비율', '국민임대', '영구임대',
       '공공임대(단기)', '장기전세', '행복주택', '공공임대(분납)', '공공분양', '공공임대(50년)', '임대료',
       '임대보증금' , '대형전용면적', 'area_0', 'area_1', 'area_2',"단지/공가","남여2039/공가"]]
y = train_up['등록차량수']

In [67]:
models = [
         CatBoostRegressor(random_state=2021, loss_function = 'MAE'),
        
         ]
model_up,pred_up,error_up = fit_models(X0, y, models, cv=True)
print('MAE : ',error_up)

MAE :  297.49040122191604


In [36]:
X0 = X[['총세대수', '공가수', '지하철역', '버스정류장', "단지내주차면수", '연면적new','위도', '경도',
       'subway_dist', '환승역 수', '총인구수', '세대당_인구', '남/여비율', '남/여_0~19세',
    "남/여_20~39세", '남/여_40~69세', '남/여_70세이상', '0~19세_비율', '20~39세_비율',
       '40~69세_비율', '70세이상_비율', '세대당_가능주차면수', '공가비율', '국민임대', '영구임대',
       '공공임대(단기)', '장기전세', '행복주택', '공공임대(분납)', '공공분양', '공공임대(50년)', '임대료',
       '임대보증금' , '대형전용면적', 'area_0', 'area_1', 'area_2',"남여2039/공가"]]
y = train_up['등록차량수']

In [37]:
models = [
         CatBoostRegressor(random_state=2021, loss_function = 'MAE'),
        
         ]
model_up,pred_up,error_up = fit_models(X0, y, models, cv=True)
print('MAE : ',error_up)

MAE :  288.66167335390753


In [38]:
X0 = X[['총세대수', '공가수', '지하철역', '버스정류장', "단지내주차면수", '연면적new','위도', '경도',
       'subway_dist', '환승역 수', '총인구수', '세대당_인구', '남/여비율', '남/여_0~19세',
    "남/여_20~39세", '남/여_40~69세', '남/여_70세이상', '0~19세_비율', '20~39세_비율',
       '40~69세_비율', '70세이상_비율', '세대당_가능주차면수', '공가비율', '국민임대', '영구임대',
       '공공임대(단기)', '장기전세', '행복주택', '공공임대(분납)', '공공분양', '공공임대(50년)', '임대료',
       '임대보증금' , '대형전용면적', 'area_0', 'area_1', 'area_2',"남여비율/공가"]]
y = train_up['등록차량수']

In [39]:
models = [
         CatBoostRegressor(random_state=2021, loss_function = 'MAE'),
        
         ]
model_up,pred_up,error_up = fit_models(X0, y, models, cv=True)
print('MAE : ',error_up)

MAE :  291.15518413774737


In [54]:
X0 = X[['총세대수', '공가수', '지하철역', '버스정류장', "단지내주차면수", '연면적new','위도', '경도',
       'subway_dist', '환승역 수', '총인구수', '세대당_인구', '남/여비율', '남/여_0~19세',
    "남/여_20~39세", '남/여_40~69세', '남/여_70세이상', '0~19세_비율', '20~39세_비율',
       '40~69세_비율', '70세이상_비율', '세대당_가능주차면수', '공가비율', '국민임대', '영구임대',
       '공공임대(단기)', '장기전세', '행복주택', '공공임대(분납)', '공공분양', '공공임대(50년)', '임대료',
       '임대보증금' , '대형전용면적', 'area_0', 'area_1', 'area_2',"총인구수/공가"]]
y = train_up['등록차량수']

In [55]:
models = [
         CatBoostRegressor(random_state=2021, loss_function = 'MAE'),
        
         ]
model_up,pred_up,error_up = fit_models(X0, y, models, cv=True)
print('MAE : ',error_up)


MAE :  281.2557053113245


In [None]:
X0 = X[['총세대수', '공가수', '지하철역', '버스정류장', "단지내주차면수", '연면적new','위도', '경도',
       'subway_dist', '환승역 수', '총인구수', '세대당_인구', '남/여비율', '남/여_0~19세',
    "남/여_20~39세", '남/여_40~69세', '남/여_70세이상', '0~19세_비율', '20~39세_비율',
       '40~69세_비율', '70세이상_비율', '세대당_가능주차면수', '공가비율', '국민임대', '영구임대',
       '공공임대(단기)', '장기전세', '행복주택', '공공임대(분납)', '공공분양', '공공임대(50년)', '임대료',
       '임대보증금' , '대형전용면적', 'area_0', 'area_1', 'area_2',"총인구수/공가"]]
y = train_up['등록차량수']

In [50]:
X0 = X[['총세대수', '공가수', '지하철역', '버스정류장', "단지내주차면수", '연면적new','위도', '경도',
       'subway_dist', '환승역 수', '총인구수', '세대당_인구', '남/여비율', '남/여_0~19세',
    "남/여_20~39세", '남/여_40~69세', '남/여_70세이상', '0~19세_비율', '20~39세_비율',
       '40~69세_비율', '70세이상_비율', '세대당_가능주차면수', '공가비율', '국민임대', '영구임대',
       '공공임대(단기)', '장기전세', '행복주택', '공공임대(분납)', '공공분양', '공공임대(50년)', '임대료',
       '임대보증금' , '대형전용면적', 'area_0', 'area_1', 'area_2',"세대당인구/공가"]]
y = train_up['등록차량수']

In [51]:
models = [
         CatBoostRegressor(random_state=2021, loss_function = 'MAE'),
        
         ]
model_up,pred_up,error_up = fit_models(X0, y, models, cv=True)
print('MAE : ',error_up)


MAE :  298.636074571094


In [60]:
X0 = X[['총세대수', '공가수',  '버스정류장', "단지내주차면수", '연면적new','위도', '경도',
      '환승역 수', '총인구수', '세대당_인구', '남/여비율', '남/여_0~19세',
    "남/여_20~39세", '남/여_40~69세', '남/여_70세이상', '0~19세_비율', '20~39세_비율',
       '40~69세_비율', '70세이상_비율', '세대당_가능주차면수', '공가비율', '국민임대', '영구임대',
       '공공임대(단기)', '장기전세', '행복주택', '공공임대(분납)', '공공분양', '공공임대(50년)', '임대료',
       '임대보증금' , '대형전용면적', 'area_0', 'area_1', 'area_2',"subway_dist/공가"]]
y = train_up['등록차량수']

In [61]:
models = [
         CatBoostRegressor(random_state=2021, loss_function = 'MAE'),
        
         ]
model_up,pred_up,error_up = fit_models(X0, y, models, cv=True)
print('MAE : ',error_up)


MAE :  279.4783669509222


In [71]:
X0 = X[['총세대수', '공가수',  '버스정류장', "단지내주차면수", '연면적new','위도', '경도',
      '환승역 수', '총인구수', "세대당_인구", '남/여비율', '남/여_0~19세',
    "남/여_20~39세", '남/여_40~69세', '남/여_70세이상', '0~19세_비율', '20~39세_비율',
       '40~69세_비율', '70세이상_비율', '세대당_가능주차면수', '공가비율', '국민임대', '영구임대',
       '공공임대(단기)', '장기전세', '행복주택', '공공임대(분납)', '공공분양', '공공임대(50년)', '임대료',
       '임대보증금' , '대형전용면적', 'area_0', 'area_1', 'area_2',"세대당인구/subway_dist"]]
y = train_up['등록차량수']

In [72]:
models = [
         CatBoostRegressor(random_state=2021, loss_function = 'MAE'),
        
         ]
model_up,pred_up,error_up = fit_models(X0, y, models, cv=True)
print('MAE : ',error_up)


MAE :  306.4730282957438


In [40]:
models = [CatBoostRegressor(random_state=2021, loss_function = 'MAE')]
model_up,pred_up,error_up = fit_models(X, y, models, cv=True)
print('MAE : ',error_up)

MAE :  291.87954235273975
