### 개발 환경(Colab)
- 운영체제 : Ubuntu 18.04.6 LTS
- 언어 : Python 3.8.16
- 라이브러리 : 
- torch 1.13.0+cu116
- pandas 1.3.5
- numpy 1.21.6
- xgboost 0.90
- skleran 1.0.2
- matplotlib 3.2.2

## Import

In [None]:
# 한글폰트 다운로드
# 다운 후 런타임 다시 시작
!sudo apt-get install -y fonts-nanum
!sudo fc-cache -fv
!rm ~/.cache/matplotlib -rf

In [None]:
import random
import pandas as pd
import numpy as np
import os
import glob
import time

from tqdm import tqdm,auto

import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader

import warnings
warnings.filterwarnings(action='ignore') 

import xgboost

from sklearn.metrics import mean_squared_error
from sklearn.preprocessing import MinMaxScaler, StandardScaler
from sklearn.model_selection import cross_validate,train_test_split

import matplotlib.pyplot as plt
import seaborn as sns
import pickle

# 한글 폰트 적용
plt.rc('font', family='NanumBarunGothic')
np.set_printoptions(formatter={'float_kind': lambda x: "{0:0.6f}".format(x)})

In [None]:
device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')

In [None]:
# 구글 드라이브 마운트
from google.colab import drive
drive.mount('/content/drive')
%cd /content/drive/MyDrive/데이콘/상추

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).
/content/drive/MyDrive/데이콘/상추


## Data Pre-processing

In [None]:
# 전체 데이터 경로로
all_input_list = sorted(glob.glob('./data/train_input/*.csv'))
all_target_list = sorted(glob.glob('./data/train_target/*.csv'))

test_input_list = sorted(glob.glob('./data/test_input/*.csv'))
test_target_list = sorted(glob.glob('./data/test_target/*.csv'))

In [None]:
# 데이터 전처리
def merge_data(input_list,target_list,col_list,drop_feature_list):
  data_list = []
  label_list = []

  for input_path, target_path in tqdm(zip(input_list,target_list)):
    input_df = pd.read_csv(input_path)
    target_df = pd.read_csv(target_path)

    # 날짜 컬럼 전처리
    if 'obs_time' in input_df.columns:
      input_df['obs_time'] = input_df['obs_time'].map(lambda x: int(x[:2]))

    # 시간별 파생변수 생성(온도,습도,이산화탄소 반영)
    input_df['습도온도'] = input_df['내부습도관측치'] * input_df['내부온도관측치']
    input_df['습도이산화탄소'] = input_df['내부습도관측치'] * input_df['co2관측치'] 
    input_df['온도이산화탄소'] = input_df['내부온도관측치'] * input_df['co2관측치']
    input_df['습도온도이산화탄소'] = input_df['내부습도관측치'] * input_df['내부온도관측치'] * input_df['co2관측치']

    input_df['광량여부'] = (input_df['시간당백색광량'] + input_df['시간당적색광량'] + input_df['시간당청색광량']).map(lambda x: 0 if x == 0 else 1)

    # 변수 선택
    if len(drop_feature_list) > 0:
      input_df = input_df.drop(columns=drop_feature_list)
      input_df = input_df.fillna(0)
    else:
      input_df = input_df.fillna(0)
        
    input_length = int(len(input_df)/24)
    target_length = int(len(target_df))

    for idx in range(target_length):
        # 시간별 데이터 하루 단위로 자르기
        day_data = input_df[24*idx:24*(idx+1)]

        # 하루 단위 데이터 flatten
        time_series = day_data.values.flatten().tolist() + [idx]
        data_list.append(time_series)

    for label in target_df["predicted_weight_g"]:
        label_list.append(label)
  
  # 컬럼명 리스트
  feature_list = [x for x in col_list if x not in drop_feature_list]
  total_col = []
  for hour in range(24):
    for col in feature_list:
      total_col.append(f'{hour}시_{col}')

  total_col.append('day')

  input = pd.DataFrame(data_list,columns=total_col)
  target = pd.DataFrame(label_list)

  # 이상값 수정
  input.loc[input['15시_내부온도관측치'] == 0, ['15시_내부온도관측치','15시_내부습도관측치']] = 27.46201254701998,51.560314940325554
  input.loc[input['1시_시간당분무량'] < 0,'1시_시간당분무량'] = 0
  input.loc[input['15시_시간당백색광량'] < 0 ,['15시_시간당백색광량','15시_시간당적색광량','15시_시간당청색광량']] = 18255.1900 , 1813.6608 , 3565.3540 
  return input, target

# target을 성장치로 수정(0일차의 target은 0일차의 상추 중량)
def modified_target(target_df):
  zero_start = []
  for i in range(0,len(target_df)):
    if i%28 == 0:
      zero_start.append(target_df[0][i])
      
    else:
      num = target_df[0][i] - target_df[0][i-1]
      zero_start.append(num)

  target_df[0] = zero_start
  return target_df

In [None]:
# 총 변수 리스트
col_list = ['DAT','obs_time','내부온도관측치', '내부습도관측치', 'co2관측치','ec관측치','시간당분무량',
       '일간누적분무량', '시간당백색광량', '일간누적백색광량', '시간당적색광량', '일간누적적색광량', '시간당청색광량',
       '일간누적청색광량', '시간당총광량', '일간누적총광량','습도온도','습도이산화탄소','온도이산화탄소','습도온도이산화탄소','광량여부']

# 제거할 변수 선택
drop_feature_list = ['DAT','obs_time','일간누적분무량','일간누적백색광량', '일간누적적색광량' ,'일간누적청색광량', '일간누적총광량','시간당총광량']

# 사용할 변수 리스트
feature_list = [x for x in col_list if x not in drop_feature_list]

# 데이터 합치고 파생변수 생성
train_X, train_y = merge_data(all_input_list,all_target_list,col_list,drop_feature_list)
test_X, test_y = merge_data(test_input_list,test_target_list,col_list,drop_feature_list)

predict_feature_list = train_X.columns
print(feature_list)
print(train_X.shape,len(train_y))
print(test_X.shape,len(test_y))

28it [00:01, 20.45it/s]
5it [00:00, 30.58it/s]


['내부온도관측치', '내부습도관측치', 'co2관측치', 'ec관측치', '시간당분무량', '시간당백색광량', '시간당적색광량', '시간당청색광량', '습도온도', '습도이산화탄소', '온도이산화탄소', '습도온도이산화탄소', '광량여부']
(784, 313) 784
(140, 313) 140


In [None]:
# 데이터 스케일링
p_scaler = StandardScaler()
p_scaler.fit(train_X)
train_scaled = p_scaler.transform(train_X)
test_scaled = p_scaler.transform(test_X)
train_y = modified_target(train_y)

In [None]:
train

In [1]:
# 모델 정의
def xgb_cv(train_X,train_y):
  start_time = time.time()
  xgb = xgboost.XGBRegressor(n_estimators=100,
                            tree_method='exact',
                            random_state=42)

  xgb_result = cross_validate(xgb, train_X, train_y,
                      scoring=['neg_mean_squared_error'],
                      return_train_score=True,
                      cv=5, n_jobs=-1)
  
  xgb_rmse = (-1 * xgb_result['test_neg_mean_squared_error'])**0.5

  xgb_score = round(np.mean(xgb_rmse),3)
  print("Time: {:.4f}sec".format((time.time() - start_time)))
  return xgb_score

In [None]:
xgb_result = xgb_cv(train_scaled,train_y)
print(xgb_result)

Time: 16.2898sec
3.05


In [None]:
# xgb 모델 저장
xgb = xgboost.XGBRegressor(n_estimators=100,
                            tree_method='exact',
                            random_state=42).fit(train_scaled,train_y)


pickle.dump(xgb,open('./xgboost_best','wb'))



In [None]:
# 저장한 xgb 모델 불러오기
xgb_model = pickle.load(open('./xgboost_best', 'rb'))



In [None]:
# 수정했던 타겟 값 원래대로 되돌리기
def revert_target(pred_list):
  result = []
  for i in range(len(pred_list)):
    if i%28 == 0:
      result.append(pred_list[i])
    else:
      num = result[-1] + pred_list[i]
      result.append(num)
  return result

# 일별 상추 중량 확인
def day_result(pred_list):
  result = pd.DataFrame(columns=list(range(28)))
  for idx in range(5):
    day_data = pd.Series(pred_list[28*idx:28*(idx+1)])
    result = result.append(pd.Series(day_data),ignore_index=True)
  return result

In [None]:
# 예측 
xgb_pred= xgb.predict(test_scaled)
xgb_pred = revert_target(xgb_pred)

# 케이스별 데이터 확인
dr = day_result(xgb_pred)
dr

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,18,19,20,21,22,23,24,25,26,27
0,0.415173,1.024601,1.847821,2.63706,3.919595,4.742815,5.566035,6.389256,7.335939,8.412372,...,41.080109,45.810837,53.620605,61.346138,69.462524,78.560516,84.826561,91.386734,96.882187,104.905067
1,0.266023,0.495571,0.731275,1.032407,1.255402,1.478398,1.77953,2.322369,2.824846,3.348904,...,20.780144,24.487343,30.642508,35.739086,39.639038,43.99789,48.216858,51.28392,55.202072,60.131596
2,0.381642,0.051774,-0.231623,-0.402198,-0.486533,-0.08365,-0.217281,0.091431,0.120639,0.119323,...,8.091178,10.211843,11.595337,12.688068,14.714088,16.936148,19.089163,22.336033,25.125488,27.40144
3,0.475957,1.345434,2.161441,2.962293,4.239672,5.055679,5.871686,6.687692,7.714614,8.708713,...,38.598755,43.651939,49.460266,56.022346,62.350372,71.006195,77.84259,83.885406,90.51178,98.195816
4,0.449314,0.786947,1.609664,1.896046,2.514316,2.923595,3.73591,4.117912,5.459744,7.007852,...,25.272442,28.773855,32.863197,39.312222,45.13599,50.21899,55.89196,62.950771,68.17202,73.583435


In [None]:
def make_result(result_df,test_target_list):
  for i in range(len(test_target_list)):
    submit_df = pd.read_csv(test_target_list[i])
    submit_df['predicted_weight_g'] = result_df[28*i:28*(i+1)]
    submit_df.to_csv(test_target_list[i],index=False)

make_result(xgb_pred,test_target_list)

In [None]:
import zipfile
os.chdir(".data/test_target/")
submission = zipfile.ZipFile("../../submission/sw_submission.zip", 'w')
for path in test_target_list:
    path = path.split('/')[-1]
    submission.write(path)

submission.close()
os.chdir("../../")

## 생성 모델

In [None]:
def seed_everything(seed):
    random.seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False
    torch.cuda.manual_seed_all(seed)
    deterministic = True

seed_everything(42) # Seed 고정

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print("Using device:", device)

Using device: cpu


In [None]:
# 데이터 전처리
def merge_data(input_list,target_list,col_list,drop_feature_list):
  data_list = []
  label_list = []

  for input_path, target_path in tqdm(zip(input_list,target_list)):
    input_df = pd.read_csv(input_path)
    target_df = pd.read_csv(target_path)

    # 날짜 컬럼 전처리
    if 'obs_time' in input_df.columns:
      input_df['obs_time'] = input_df['obs_time'].map(lambda x: int(x[:2]))

    # 변수 선택
    if len(drop_feature_list) > 0:
      input_df = input_df.drop(columns=drop_feature_list)
      input_df = input_df.fillna(0)
    else:
      input_df = input_df.fillna(0)
        
    input_length = int(len(input_df)/24)
    target_length = int(len(target_df))

    for idx in range(target_length):
        # 시간별 데이터 하루 단위로 자르기
        day_data = input_df[24*idx:24*(idx+1)]

        # 하루 단위 데이터 flatten
        time_series = day_data.values.flatten().tolist()  + [idx]
        data_list.append(time_series)

    for label in target_df["predicted_weight_g"]:
        label_list.append(label)
  
  # 컬럼명
  feature_list = [x for x in col_list if x not in drop_feature_list]
  total_col = []
  for hour in range(24):
    for col in feature_list:
      total_col.append(f'{hour}시_{col}')

  total_col.append('day')

  input = pd.DataFrame(data_list,columns=total_col)
  target = pd.DataFrame(label_list)

  # 특이값 수정
  input.loc[input['15시_내부온도관측치'] == 0, ['15시_내부온도관측치','15시_내부습도관측치']] = 27.46201254701998,51.560314940325554
  input.loc[input['1시_시간당분무량'] < 0,'1시_시간당분무량'] = 0
  input.loc[input['15시_시간당백색광량'] < 0 ,['15시_시간당백색광량','15시_시간당적색광량','15시_시간당청색광량']] = 18255.1900 , 1813.6608 , 3565.3540 
  return input, target

# target을 성장치로 수정(0일차의 target은 0일차의 상추 중량)
def modified_target(target_df):
  zero_start = []
  for i in range(0,len(target_df)):
    if i%28 == 0:
      zero_start.append(target_df[0][i])
      
    else:
      num = target_df[0][i] - target_df[0][i-1]
      zero_start.append(num)

  target_df[0] = zero_start
  return target_df

In [None]:
# 총 변수 리스트
col_list = ['DAT','obs_time','내부온도관측치', '내부습도관측치', 'co2관측치','ec관측치','시간당분무량',
       '일간누적분무량', '시간당백색광량', '일간누적백색광량', '시간당적색광량', '일간누적적색광량', '시간당청색광량',
       '일간누적청색광량', '시간당총광량', '일간누적총광량']

# 제거할 변수 선택
drop_feature_list = ['DAT','obs_time','일간누적분무량','일간누적백색광량', '일간누적적색광량' ,'일간누적청색광량', '일간누적총광량','시간당총광량']

# 사용할 변수 리스트
generator_feature_list = [x for x in col_list if x not in drop_feature_list]

# 데이터 합치고 파생변수 생성
train_X, train_y = merge_data(all_input_list,all_target_list,col_list,drop_feature_list)
test_X, test_y = merge_data(test_input_list,test_target_list,col_list,drop_feature_list)

print(generator_feature_list)
print(train_X.shape,len(train_y))
print(test_X.shape,len(test_y))

28it [00:01, 25.52it/s]
5it [00:00, 74.75it/s]


['내부온도관측치', '내부습도관측치', 'co2관측치', 'ec관측치', '시간당분무량', '시간당백색광량', '시간당적색광량', '시간당청색광량']
(784, 193) 784
(140, 193) 140


In [None]:
# 데이터 스케일링
g_scaler = StandardScaler()
g_scaler.fit(train_X.iloc[:,:-1])
df_input = pd.DataFrame(g_scaler.transform(train_X.iloc[:,:-1]),columns=train_X.columns[:-1])
df_input = pd.concat([df_input,pd.get_dummies(train_X['day'],drop_first=True)],axis=1)
df_target = modified_target(train_y)

In [None]:
cfg = ({'batch_size': 16,
        'epoch' : 50,
        'lr' : 5e-4
        })

In [None]:
class CustomDataset(Dataset):
    def __init__(self, df_input, df_target, infer_mode):
        self.df_input = df_input
        self.df_target = df_target
        self.infer_mode = infer_mode
        
        self.data_list = []
        self.label_list = []
        print('Data Pre-processing..')
        for i in tqdm(range(len(self.df_input))):
          tmp_input = self.df_input.iloc[i].values
          tmp_target = self.df_target.iloc[i].values
          self.data_list.append(tmp_input)
          self.label_list.append(tmp_target)
        
        self.data = torch.Tensor(self.data_list)
        self.label = torch.Tensor(self.label_list)
        print('Done.')
              
    def __getitem__(self, index):
        data = self.data[index]
        label = self.label[index]
        if self.infer_mode == False:
            return data, label
        else:
            return data
        
    def __len__(self):
        return len(self.data_list)

In [None]:
train_dataset = CustomDataset(df_input, df_target, False)
train_loader = DataLoader(train_dataset, batch_size = cfg['batch_size'], shuffle=True, num_workers=6)

Data Pre-processing..


100%|██████████| 784/784 [00:00<00:00, 3451.93it/s]

Done.





In [None]:
class CVAE(nn.Module):
    def __init__(self, x_dim, conv_dim, h_dim1, h_dim2, z_dim, c_dim):
        super(CVAE, self).__init__()
        self.conv_dim = conv_dim
        self.conv = nn.Sequential(nn.Conv1d(1,conv_dim,kernel_size=x_dim//24,stride=x_dim//24),
                                  nn.ReLU())
        self.fc1 = nn.Linear(conv_dim*24 + c_dim, h_dim1)
        self.fc2 = nn.Linear(h_dim1, h_dim2)
        self.mu = nn.Linear(h_dim2, z_dim)
        self.log_var = nn.Linear(h_dim2, z_dim)

        self.fc3 = nn.Linear(z_dim + c_dim, h_dim2)
        self.fc4 = nn.Linear(h_dim2, h_dim1)
        self.fc5 = nn.Linear(h_dim1, conv_dim*24)
        self.deconv = nn.Sequential(nn.ReLU(),
                                  nn.ConvTranspose1d(conv_dim,1,kernel_size=x_dim//24,stride=x_dim//24))
    
    def encoder(self, x, c):
        x = self.conv(torch.unsqueeze(x,1)).flatten(1)
        concat_input = torch.cat([x, c], 1)
        h = F.relu(self.fc1(concat_input))
        h = F.relu(self.fc2(h))
        return self.mu(h), self.log_var(h)
    
    def sampling(self, mu, log_var):
        std = torch.exp(0.5*log_var)
        eps = torch.randn_like(std)
        return eps.mul(std).add(mu)
    
    def decoder(self, z, c):
        concat_input = torch.cat([z, c], 1)
        h = F.relu(self.fc3(concat_input))
        h = F.relu(self.fc4(h))
        h = F.relu(self.fc5(h))
        h = h.reshape(-1,self.conv_dim,24)
        return self.deconv(h).flatten(1)
    
    def forward(self, x, c):
        mu, log_var = self.encoder(x, c)
        z = self.sampling(mu, log_var)
        return self.decoder(z, c), mu, log_var

# Loss 정의
class CVAE_Loss(nn.MSELoss):
    def __init__(self, dim=784, alpha=1, beta=1):
        super(CVAE_Loss,self).__init__()
        self.dim = dim
        self.alpha = alpha
        self.beta = beta
        
    def forward(self, recon_x, x, mu, log_var,c):
        BCE = F.mse_loss(recon_x, x.view(-1, self.dim), reduction='mean')
        KLD = -0.5 * torch.sum(1 + log_var - mu.pow(2) - log_var.exp())
        value_loss = get_value_loss(recon_x,x,c,self.beta)
        return self.alpha*BCE + value_loss

# 생성한 데이터를 예측 모델에 넣기 위한 전처리
def convert_for_predict(output,dat):
  output = pd.DataFrame(output)
  output.astype('float')
  output = g_scaler.inverse_transform(output)
  output[output<0] = 0

  convert_output = output[:,[i for i in range(8)]]
  new1 = output[:,[0]]*output[:,[1]]
  new2 = output[:,[1]]*output[:,[2]]
  new3 = output[:,[2]]*output[:,[0]]
  new4 = output[:,[0]]*output[:,[1]]*output[:,[2]]
  light_sum = np.array([0])
  for c in range(5,8):
    light_sum = light_sum + output[:,c]
  light_all = np.array([1 if x>0 else 0 for x in light_sum]).reshape(-1,1)
  convert_output = np.concatenate([convert_output,new1,new2,new3,new4,light_all],axis=1)

  for i in range(1,24):
    tmp_output = output[:,[j for j in range(i*8,i*8+8)]]
    new1 = output[:,[i*8]]*output[:,[i*8+1]]
    new2 = output[:,[i*8+1]]*output[:,[i*8+2]]
    new3 = output[:,[i*8+2]]*output[:,[i*8]]
    new4 = output[:,[i*8]]*output[:,[i*8+1]]*output[:,[i*8+2]]
    light_sum = np.array([0])
    for c in range(i*8+5,i*8+8):
      light_sum = light_sum + output[:,c]
    light_all = np.array([1 if x>0 else 0 for x in light_sum]).reshape(-1,1)
    convert_output = np.concatenate([convert_output,tmp_output,new1,new2,new3,new4,light_all],axis=1)

  convert_output = np.concatenate([convert_output,dat.reshape(-1,1)],axis=1)
  convert_output = p_scaler.transform(convert_output)
  df = pd.DataFrame(convert_output,columns=predict_feature_list)
  return df

# 실제 데이터와 생성 데이터의 예측 중량 차이
def get_value_loss(recon_x,x,c,beta=1):
  encoded_dat = c.split(27,dim=1)[0].to('cpu').squeeze()

  dat = np.zeros(len(encoded_dat))
  decoded_dat = np.array((dat==1).nonzero())
  if decoded_dat:
    for d in decoded_dat:
      dat[d[0]] = d[1]
  recon_x_df = convert_for_predict(recon_x,dat)
  x_df = convert_for_predict(x,dat)

  recon_weight = torch.Tensor(xgb.predict(recon_x_df.values)).to(device)
  weight = torch.Tensor(xgb.predict(x_df.values)).to(device)

  result = F.mse_loss(recon_weight,weight,reduction='mean')*beta
  return torch.Tensor(result).to(device)

In [None]:
# Model, Loss , Optimizer 정의
seed_everything(42)
model = CVAE(x_dim=192, conv_dim=32, h_dim1= 786, h_dim2=256,z_dim=2, c_dim=28).to(device)
criterion = CVAE_Loss(dim=192,alpha=2, beta=0.5)
optimizer = optim.Adam(model.parameters(),lr=cfg['lr'])

## Train!

In [None]:
import time
start_time = time.time()

best_loss = 1e10
for epoch in range(cfg['epoch']):
    model.train()
    train_loss = 0
    for data,label in tqdm(train_loader):
        split_data = data.split(192,dim=1)
        x = split_data[0].to(device)
        c = torch.cat([split_data[1],label.view(len(x),-1)],dim=1).to(device)
        optimizer.zero_grad()
        recon_x, mu, log_var = model(x,c)
        loss = criterion(recon_x, x, mu, log_var,c)
        loss.backward()
        optimizer.step()

        train_loss += loss.item()
    print(f'epoch : {epoch} , train_loss : {train_loss/len(train_dataset)}')
    if best_loss > train_loss/len(train_dataset):
      best_model = model

print(time.time()-start_time)

100%|██████████| 49/49 [00:04<00:00, 12.22it/s]


epoch : 0 , train_loss : 0.29982769550109395


100%|██████████| 49/49 [00:04<00:00, 10.61it/s]


epoch : 1 , train_loss : 0.23453743482122616


100%|██████████| 49/49 [00:04<00:00, 11.16it/s]


epoch : 2 , train_loss : 0.18333615393054728


100%|██████████| 49/49 [00:05<00:00,  9.28it/s]


epoch : 3 , train_loss : 0.16522066508020675


100%|██████████| 49/49 [00:06<00:00,  7.67it/s]


epoch : 4 , train_loss : 0.15265200773672183


100%|██████████| 49/49 [00:06<00:00,  7.42it/s]


epoch : 5 , train_loss : 0.14982504823378154


100%|██████████| 49/49 [00:06<00:00,  8.11it/s]


epoch : 6 , train_loss : 0.14788862743547984


100%|██████████| 49/49 [00:04<00:00, 10.90it/s]


epoch : 7 , train_loss : 0.14225164724856007


100%|██████████| 49/49 [00:05<00:00,  9.60it/s]


epoch : 8 , train_loss : 0.14926687947341374


100%|██████████| 49/49 [00:04<00:00, 11.61it/s]


epoch : 9 , train_loss : 0.13226851068285048


100%|██████████| 49/49 [00:04<00:00, 10.27it/s]


epoch : 10 , train_loss : 0.13138879485884492


100%|██████████| 49/49 [00:05<00:00,  9.71it/s]


epoch : 11 , train_loss : 0.14010607352366253


100%|██████████| 49/49 [00:08<00:00,  5.63it/s]


epoch : 12 , train_loss : 0.12660767115196403


100%|██████████| 49/49 [00:07<00:00,  6.63it/s]


epoch : 13 , train_loss : 0.11960630727057554


100%|██████████| 49/49 [00:06<00:00,  7.93it/s]


epoch : 14 , train_loss : 0.12539229146680053


100%|██████████| 49/49 [00:05<00:00,  8.99it/s]


epoch : 15 , train_loss : 0.11613410688480552


100%|██████████| 49/49 [00:06<00:00,  7.07it/s]


epoch : 16 , train_loss : 0.11747000670554686


100%|██████████| 49/49 [00:06<00:00,  7.80it/s]


epoch : 17 , train_loss : 0.11545727911348246


100%|██████████| 49/49 [00:07<00:00,  6.97it/s]


epoch : 18 , train_loss : 0.11513917368589616


100%|██████████| 49/49 [00:05<00:00,  9.41it/s]


epoch : 19 , train_loss : 0.10683318691290154


100%|██████████| 49/49 [00:05<00:00,  8.95it/s]


epoch : 20 , train_loss : 0.10974308437838846


100%|██████████| 49/49 [00:05<00:00,  8.20it/s]


epoch : 21 , train_loss : 0.10476981027393925


100%|██████████| 49/49 [00:04<00:00, 10.92it/s]


epoch : 22 , train_loss : 0.0984559132888609


100%|██████████| 49/49 [00:04<00:00, 11.25it/s]


epoch : 23 , train_loss : 0.09519154099481446


100%|██████████| 49/49 [00:05<00:00,  8.79it/s]


epoch : 24 , train_loss : 0.09861432251577475


100%|██████████| 49/49 [00:06<00:00,  8.14it/s]


epoch : 25 , train_loss : 0.09858869692804861


100%|██████████| 49/49 [00:04<00:00, 10.73it/s]


epoch : 26 , train_loss : 0.09824166310076811


100%|██████████| 49/49 [00:04<00:00, 11.25it/s]


epoch : 27 , train_loss : 0.0910570918753439


100%|██████████| 49/49 [00:05<00:00,  8.77it/s]


epoch : 28 , train_loss : 0.0934499092400074


100%|██████████| 49/49 [00:06<00:00,  7.42it/s]


epoch : 29 , train_loss : 0.09636361421827151


100%|██████████| 49/49 [00:07<00:00,  6.96it/s]


epoch : 30 , train_loss : 0.10007303770707578


100%|██████████| 49/49 [00:07<00:00,  6.78it/s]


epoch : 31 , train_loss : 0.09169071684686506


100%|██████████| 49/49 [00:07<00:00,  6.21it/s]


epoch : 32 , train_loss : 0.08996386354675098


100%|██████████| 49/49 [00:05<00:00,  9.47it/s]


epoch : 33 , train_loss : 0.08701835724772239


100%|██████████| 49/49 [00:06<00:00,  7.51it/s]


epoch : 34 , train_loss : 0.09300367275671083


100%|██████████| 49/49 [00:04<00:00, 10.87it/s]


epoch : 35 , train_loss : 0.08824373530794163


100%|██████████| 49/49 [00:04<00:00, 11.30it/s]


epoch : 36 , train_loss : 0.09486965390340406


100%|██████████| 49/49 [00:05<00:00,  9.65it/s]


epoch : 37 , train_loss : 0.08346935962231791


100%|██████████| 49/49 [00:04<00:00, 11.01it/s]


epoch : 38 , train_loss : 0.08890176089290454


100%|██████████| 49/49 [00:04<00:00, 10.89it/s]


epoch : 39 , train_loss : 0.08110262924919323


100%|██████████| 49/49 [00:04<00:00, 10.80it/s]


epoch : 40 , train_loss : 0.08173884116873449


100%|██████████| 49/49 [00:04<00:00, 10.88it/s]


epoch : 41 , train_loss : 0.08145101847393173


100%|██████████| 49/49 [00:04<00:00, 11.10it/s]


epoch : 42 , train_loss : 0.08073602631992223


100%|██████████| 49/49 [00:04<00:00, 10.90it/s]


epoch : 43 , train_loss : 0.07805253610927232


100%|██████████| 49/49 [00:04<00:00, 10.83it/s]


epoch : 44 , train_loss : 0.07334157516609649


100%|██████████| 49/49 [00:04<00:00, 10.89it/s]


epoch : 45 , train_loss : 0.0781900455452958


100%|██████████| 49/49 [00:04<00:00, 10.98it/s]


epoch : 46 , train_loss : 0.07700831424064782


100%|██████████| 49/49 [00:04<00:00, 10.25it/s]


epoch : 47 , train_loss : 0.073846698947707


100%|██████████| 49/49 [00:06<00:00,  7.74it/s]


epoch : 48 , train_loss : 0.07369867522193461


100%|██████████| 49/49 [00:04<00:00, 10.30it/s]

epoch : 49 , train_loss : 0.07315665696348463
271.88793659210205





In [None]:
# 생성 모델 저장
# torch.save(best_model, './generate_model1.pt')
torch.save(best_model.state_dict(), './best_model.pt')

## 생장환경 생성

In [None]:
# 생성 모델 불러오기
# model = torch.load('./generate_model.pt')
model.load_state_dict(torch.load( './best_model.pt'))

<All keys matched successfully>

In [None]:
# 생장환경 생성 
model.eval()
data_mu, data_var = 0, 0
for data,label in tqdm(train_loader):
    split_data = data.split(192,dim=1)
    x = split_data[0].to(device)
    c = torch.cat([split_data[1],label.view(len(x),-1)],dim=1).to(device)
    sample_mu, sample_log_var = model.encoder(x,c)
    data_mu += torch.sum(sample_mu,0)
    data_var += torch.sum(torch.exp(sample_log_var),0)

data_mu /= len(train_dataset)
data_var /= len(train_dataset)
data_log_var = torch.log(data_var)
print(data_mu,data_log_var)

100%|██████████| 49/49 [00:00<00:00, 114.06it/s]

tensor([ 1.7718, -3.2541], grad_fn=<DivBackward0>) tensor([-8.9676, -8.8576], grad_fn=<LogBackward0>)





In [None]:
data_mu = data_mu.repeat(28,1)
data_log_var = data_log_var.repeat(28,1)

In [None]:
def input_weight(x,alpha = 0.1,target_day=28):
  dat = torch.zeros(28,27)
  for i in range(27):
    dat[i+1][i] = 1
  weights = [(1/alpha)*((x*alpha)**(i/target_day)) for i in range(1,target_day+1)]
  growth = torch.zeros(28,1)
  growth[0][0] = weights[0]
  for i in range(target_day-1):
    growth[i+1][0]=(weights[i+1] - weights[i])
  
  target = torch.cat([dat,growth],dim=1)
  return target

def recovery(pred):
  result = pred[0]
  for i in range(1,28):
    result += pred[i]
  return result

In [None]:
def get_best_result(model,target_weight,target_day=28,reps=100, alpha=0.1):
  model.eval()
  best_output = 0
  best_weight = 0
  gap = 10000
  for _ in range(reps):
    with torch.no_grad():
      z = model.sampling(data_mu,data_log_var)
      c = input_weight(target_weight,alpha=alpha,target_day=target_day).to(device)
      output = model.decoder(z,c)
      dat = np.array([i for i in range(28)])
      converted_output = convert_for_predict(output,dat)

    pred = xgb.predict(converted_output.values)
    weight = recovery(pred)
    if gap > abs(target_weight-weight):
      gap = abs(target_weight-weight)
      best_weight = weight
      best_output = output
      
  # best_output[best_output<0] = 0
  return best_output, best_weight

In [None]:
seed_everything(42)
# 목표 중량을 입력하면 생장 환경이 생성됨
output,weight = get_best_result(model=model, target_weight=124, target_day=28,reps=50, alpha=0.1)
weight

122.04921

In [None]:
# 생성 환경에 따른 상추 무게 예측
dat = np.array([i for i in range(28)])
converted_output = convert_for_predict(output,dat)
pred = xgb.predict(converted_output.values)
result_target = pd.DataFrame(revert_target(pred),columns=['predicted_weight_g'])

In [None]:
pd.set_option('display.max_rows',None)

output = g_scaler.inverse_transform(output.cpu())
output[output < 0] = 0
water_list = []
white_list = []
red_list = []
blue_list = []
time_list = []
d_list = []
for d in range(28):
  tmp_water, tmp_white, tmp_red, tmp_blue, tmp_time, tmp_d = [output[d,4]],[output[d,5]],[output[d,6]],[output[d,7]],[0],[d]
  for t in range(1,24):
    tmp_water.append(tmp_water[t-1] + output[d,4+8*t])
    tmp_white.append(tmp_white[t-1] + output[d,5+8*t])
    tmp_red.append(tmp_red[t-1] + output[d,6+8*t])
    tmp_blue.append(tmp_blue[t-1] + output[d,7+8*t])
    tmp_time.append(t)
    tmp_d.append(d)
  water_list.append(tmp_water)
  white_list.append(tmp_white)
  red_list.append(tmp_red)
  blue_list.append(tmp_blue)
  time_list.append(tmp_time)
  d_list.append(tmp_d)


water_list = np.array(water_list).reshape(-1,1)
white_list = np.array(white_list).reshape(-1,1)
red_list = np.array(red_list).reshape(-1,1)
blue_list = np.array(blue_list).reshape(-1,1)
time_list = np.array(time_list).reshape(-1,1)
d_list = np.array(d_list).reshape(-1,1)

output = output.reshape(-1,8)
df = np.concatenate((d_list,time_list,output[:,:5],water_list,output[:,5:6],white_list,output[:,6:7],red_list,output[:,7:8],blue_list,output[:,5:6]+output[:,6:7]+output[:,7:8],white_list+red_list+blue_list),axis=1)
df = pd.DataFrame(df,columns=['DAT','obs_time','내부온도관측치', '내부습도관측치', 'co2관측치','ec관측치','시간당분무량',
       '일간누적분무량', '시간당백색광량', '일간누적백색광량', '시간당적색광량', '일간누적적색광량', '시간당청색광량',
       '일간누적청색광량', '시간당총광량', '일간누적총광량'])

In [None]:
raw_df = pd.read_csv('./data/train_input/CASE_17.csv')
def convert_time(x):
  return int(x[:2])

raw_df['obs_time'] = raw_df['obs_time'].apply(convert_time)

draw_cols = ['내부온도관측치', '내부습도관측치', 'co2관측치','ec관측치','시간당분무량',
       '시간당백색광량' , '시간당적색광량', '시간당청색광량',
        '시간당총광량']

fig,axes = plt.subplots(5,2,figsize=(10,20))
for i,c in enumerate(draw_cols):
  sns.lineplot(data=df,x='obs_time',y=c,ax=axes[i//2][i%2],label='예측값').set(title=f'{c}')
for i,c in enumerate(draw_cols):
  sns.lineplot(data=raw_df,x='obs_time',y=c,ax=axes[i//2][i%2],label='실제값')
plt.legend()
plt.tight_layout()

In [None]:
df.to_csv('./result_input_124.csv',index=False)
result_target.to_csv('./result_target_124.csv',index=False)