In [5]:
import pandas as pd
from sklearn.linear_model import LinearRegression
from sklearn.ensemble import GradientBoostingRegressor, RandomForestRegressor
from sklearn.tree import DecisionTreeRegressor
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error
from sklearn.model_selection import GridSearchCV
import numpy as np
import pickle
from sklearn.preprocessing import StandardScaler

In [2]:
wl_data = pd.read_csv('./data_fin/가평_시간별_종합.csv',index_col=0)

In [3]:
# 'yyyy-mm-dd HH' 형식으로 이루어진 '시간' 컬럼의 6~9월 데이터 추출
wl_data['시간'] = pd.to_datetime(wl_data['시간'])
filtered_data = wl_data[(wl_data['시간'].dt.month >= 6) & (wl_data['시간'].dt.month <= 9)]

In [6]:
# 수위 컬럼에 1.1을 더하여 학습 데이터 준비
filtered_data['수위'] += 1.1

# 유량 데이터 추출
flows = filtered_data['유량']

# 유량 데이터 스케일링을 위한 스케일러 인스턴스 생성
flows_scaler = StandardScaler()

# 유량 데이터에 대해 스케일링을 적합하고 변환
flows_scaled = flows_scaler.fit_transform(flows.values.reshape(-1, 1))

# 스케일된 유량 데이터를 데이터프레임에 추가
filtered_data['유량_scaled'] = flows_scaled

# 유량 데이터와 관련된 특성들을 포함한 데이터 프레임을 준비
X = filtered_data[['수위', '강수량', '유량_scaled']]
y = filtered_data['유량']

# 학습 데이터와 테스트 데이터로 분할
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

# 선형회귀 모델 인스턴스 생성
linear_regression_model = LinearRegression()

# 랜덤 포레스트 모델 인스턴스 생성
random_forest_model = RandomForestRegressor()

# 그라디언트 부스팅 모델 인스턴스 생성
gradient_boosting_model = GradientBoostingRegressor()

# 의사결정나무 모델 인스턴스 생성
decision_tree_model = DecisionTreeRegressor()

# 선형회귀의 하이퍼파라미터 조정과 모델 훈련
linear_regression_model.fit(X_train, y_train)
linear_regression_rmse = mean_squared_error(y_test, linear_regression_model.predict(X_test), squared=False)

# 랜덤 포레스트의 하이퍼파라미터 탐색
random_forest_param_grid = {
    'n_estimators': [100, 200, 300],
    'max_depth': [3, 4, 5]
}

random_forest_grid_search = GridSearchCV(random_forest_model, random_forest_param_grid, cv=5)
random_forest_grid_search.fit(X_train, y_train)

best_random_forest_model = random_forest_grid_search.best_estimator_
random_forest_rmse = mean_squared_error(y_test, best_random_forest_model.predict(X_test), squared=False)

# 그라디언트 부스팅의 하이퍼파라미터 탐색
gradient_boosting_param_grid = {
    'n_estimators': [100, 200, 300],
    'learning_rate': [0.1, 0.05, 0.01],
    'max_depth': [3, 4, 5]
}

gradient_boosting_grid_search = GridSearchCV(gradient_boosting_model, gradient_boosting_param_grid, cv=5)
gradient_boosting_grid_search.fit(X_train, y_train)

best_gradient_boosting_model = gradient_boosting_grid_search.best_estimator_
gradient_boosting_rmse = mean_squared_error(y_test, best_gradient_boosting_model.predict(X_test), squared=False)

# 의사결정나무의 하이퍼파라미터 탐색
decision_tree_param_grid = {
    'max_depth': [3, 4, 5],
    'min_samples_split': [2, 5, 10]
}

decision_tree_grid_search = GridSearchCV(decision_tree_model, decision_tree_param_grid, cv=5)
decision_tree_grid_search.fit(X_train, y_train)

best_decision_tree_model = decision_tree_grid_search.best_estimator_
decision_tree_rmse = mean_squared_error(y_test, best_decision_tree_model.predict(X_test), squared=False)

# 각 모델의 RMSE 측정 및 최적 하이퍼파라미터 출력
print('Linear Regression RMSE:', linear_regression_rmse)
print('Best Random Forest:', random_forest_grid_search.best_params_)
print('Random Forest RMSE:', random_forest_rmse)
print('Best Gradient Boosting:', gradient_boosting_grid_search.best_params_)
print('Gradient Boosting RMSE:', gradient_boosting_rmse)
print('Best Decision Tree:', decision_tree_grid_search.best_params_)
print('Decision Tree RMSE:', decision_tree_rmse)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  filtered_data['수위'] += 1.1
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  filtered_data['유량_scaled'] = flows_scaled


Linear Regression RMSE: 8.132529155984271e-14
Best Random Forest: {'max_depth': 5, 'n_estimators': 100}
Random Forest RMSE: 2.948655048965815
Best Gradient Boosting: {'learning_rate': 0.1, 'max_depth': 4, 'n_estimators': 200}
Gradient Boosting RMSE: 2.5110368230822466
Best Decision Tree: {'max_depth': 5, 'min_samples_split': 2}
Decision Tree RMSE: 4.034182000213849


In [7]:
# 모델 저장
def save_model(model, filename):
    with open(filename, 'wb') as file:
        pickle.dump(model, file)

save_model(best_gradient_boosting_model, 'gradient_boosting_model.pkl')

In [8]:
def load_model(filepath):
    with open(filepath, 'rb') as f:
        model = pickle.load(f)
    return model

def predict_flow(rainfall, water_level, velocity):
    # 입력 데이터를 특징 벡터로 변환
    features = np.array([[rainfall, water_level, velocity]])

    # 모델 로드
    model = load_model('gradient_boosting_model.pkl')

    # 유량 예측
    flow = model.predict(features)

    return flow

def check_risk_level(rainfall, water_level, velocity):
    # 기준 수위
    threshold_level = 1.1

    # 유량 예측
    flow = predict_flow(rainfall, water_level, velocity)

    # 강수량에 따른 위험도 계산
    if rainfall >= 10:
        rainfall_risk = 5
    else:
        rainfall_risk = 1

    # 수위에 따른 위험도 계산
    if threshold_level + water_level >= 1.3:
        water_level_risk = 5
    else:
        water_level_risk = 1

    # 유속에 따른 위험도 계산
    if velocity <= 0.1:
        velocity_risk = 1
    elif velocity <= 0.3:
        velocity_risk = 2
    elif velocity <= 0.5:
        velocity_risk = 3
    elif velocity <= 0.7:
        velocity_risk = 4
    else:
        velocity_risk = 5

    # 종합 위험도 계산
    total_risk = 0

    if rainfall_risk >= 4 or water_level_risk >= 4 or velocity_risk >= 4:
        total_risk = 5
    elif rainfall_risk >= 3 or water_level_risk >= 3 or velocity_risk >= 3:
        total_risk = 4
    elif rainfall_risk >= 2 or water_level_risk >= 2 or velocity_risk >= 2:
        total_risk = 3
    elif rainfall_risk >= 1 or water_level_risk >= 1 or velocity_risk >= 1:
        total_risk = 2
    else:
        total_risk = 1

    # 위험도를 텍스트로 표시
    if total_risk == 1:
        risk_level = '0'
    elif total_risk == 2:
        risk_level = '1'
    elif total_risk == 3:
        risk_level = '2'
    elif total_risk == 4:
        risk_level = '3'
    else:
        risk_level = '4'

    return flow, risk_level

# 강수량, 수위, 유속 입력 받기
rainfall = float(input("강수량을 입력하세요: "))
water_level = float(input("수위를 입력하세요: "))
velocity = float(input("유속을 입력하세요: "))

# 유량 예측 및 위험단계 확인
predicted_flow, risk_level = check_risk_level(rainfall, water_level, velocity)

print("Predicted Flow:", predicted_flow)
print("Risk Level:", risk_level)

강수량을 입력하세요: 1
수위를 입력하세요: 0.02
유속을 입력하세요: 0.2
Predicted Flow: [38.4649916]
Risk Level: 2


