In [1]:
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error
import numpy as np
import pandas as pd

import h2o

from h2o.automl import H2OAutoML

In [2]:
train_df = pd.read_csv('../data/treated/learn.csv')
train_df.drop(columns=['process_end_time', 'final_mes_time'], inplace=True)
train_df.head()

Unnamed: 0,OV,X1,X2,X3,X4,X5,X6,X7,X8,X9,...,X74,X75,X76,X77,X78,X79,X80,X81,X82,X83
0,111.2886,22.98,1.37,797.2,0.67,0.15,0.08,67.15,9.86,37.23,...,0.1,0.04,6.4,0.58,0.0,4.1,0.0,0.01,0.16,0.04
1,201.5617,22.45,9.4,803.8,0.68,0.15,0.08,56.11,5.27,33.98,...,0.1,0.04,6.4,0.58,0.0,4.1,0.0,0.01,0.16,0.04
2,111.941,22.03,1.14,594.52,0.64,0.14,0.08,51.57,6.15,10.44,...,0.1,0.04,6.4,0.58,0.0,4.1,0.0,0.01,0.16,0.04
3,182.3914,22.98,1.37,797.2,0.67,0.15,0.08,67.15,9.86,37.23,...,0.1,0.04,6.4,0.58,0.0,4.1,0.0,0.01,0.16,0.04
4,196.2703,22.98,1.37,797.2,0.67,0.15,0.08,67.15,9.86,37.23,...,0.1,0.04,6.4,0.58,0.0,4.1,0.0,0.01,0.16,0.04


In [3]:
test_df = pd.read_csv('../data/treated/test.csv')
test_df.drop(columns=['process_end_time', 'final_mes_time'], inplace=True)
test_df.head()


Unnamed: 0,OV,X1,X2,X3,X4,X5,X6,X7,X8,X9,...,X74,X75,X76,X77,X78,X79,X80,X81,X82,X83
0,130.1202,23.62,9.35,705.91,0.64,0.14,0.08,127.48,4.25,7.3,...,0.03,0.26,6.5,0.2,0.0,2.7,0.02,0.01,0.25,0.04
1,115.6764,23.62,9.35,705.91,0.64,0.14,0.08,127.48,4.25,7.3,...,0.03,0.26,6.5,0.2,0.0,2.7,0.02,0.01,0.25,0.04
2,104.8819,23.62,9.35,705.91,0.64,0.14,0.08,127.48,4.25,7.3,...,0.03,0.26,6.5,0.2,0.0,2.7,0.02,0.01,0.25,0.04
3,108.7957,24.35,4.92,635.52,0.64,0.14,0.09,78.09,3.22,6.29,...,0.03,0.26,6.5,0.2,0.0,2.7,0.02,0.01,0.25,0.04
4,119.7415,23.62,9.35,705.91,0.64,0.14,0.08,127.48,4.25,7.3,...,0.03,0.26,6.5,0.2,0.0,2.7,0.02,0.01,0.25,0.04


In [4]:
from sklearn.preprocessing import PolynomialFeatures

def fit_poly(df, degree=2, include_bias=False, interaction_only=False, target='OV'):
    X = df.drop(columns=[target])
    y = df[target]
    poly = PolynomialFeatures(
        degree=degree,
        include_bias=include_bias,
        interaction_only=interaction_only
    )
    X_poly = poly.fit_transform(X)
    cols = poly.get_feature_names_out(X.columns)
    X_poly_df = pd.DataFrame(X_poly, columns=cols, index=df.index)
    X_poly_df[target] = y
    return X_poly_df, poly

def transform_poly(df, poly, target='OV'):
    X = df.drop(columns=[target])
    y = df[target]
    X_poly = poly.transform(X)
    cols = poly.get_feature_names_out(X.columns)
    X_poly_df = pd.DataFrame(X_poly, columns=cols, index=df.index)
    X_poly_df[target] = y
    return X_poly_df

In [5]:
train_df_poly, poly = fit_poly(train_df)
test_df_poly = transform_poly(test_df, poly)

In [6]:
train_df_poly

Unnamed: 0,X1,X2,X3,X4,X5,X6,X7,X8,X9,X10,...,X80 X81,X80 X82,X80 X83,X81^2,X81 X82,X81 X83,X82^2,X82 X83,X83^2,OV
0,22.98,1.37,797.20,0.67,0.15,0.08,67.15,9.86,37.23,0.94,...,0.0,0.0,0.0,0.0001,0.0016,0.0004,0.0256,0.0064,0.0016,111.2886
1,22.45,9.40,803.80,0.68,0.15,0.08,56.11,5.27,33.98,0.96,...,0.0,0.0,0.0,0.0001,0.0016,0.0004,0.0256,0.0064,0.0016,201.5617
2,22.03,1.14,594.52,0.64,0.14,0.08,51.57,6.15,10.44,0.94,...,0.0,0.0,0.0,0.0001,0.0016,0.0004,0.0256,0.0064,0.0016,111.9410
3,22.98,1.37,797.20,0.67,0.15,0.08,67.15,9.86,37.23,0.94,...,0.0,0.0,0.0,0.0001,0.0016,0.0004,0.0256,0.0064,0.0016,182.3914
4,22.98,1.37,797.20,0.67,0.15,0.08,67.15,9.86,37.23,0.94,...,0.0,0.0,0.0,0.0001,0.0016,0.0004,0.0256,0.0064,0.0016,196.2703
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1150,23.12,2.79,982.97,0.89,0.16,0.08,419.87,11.25,26.97,0.91,...,0.0,0.0,0.0,0.0000,0.0000,0.0000,0.0784,0.0000,0.0000,207.9805
1151,23.12,2.79,982.97,0.89,0.16,0.08,419.87,11.25,26.97,0.91,...,0.0,0.0,0.0,0.0000,0.0000,0.0000,0.0784,0.0000,0.0000,181.5115
1152,23.12,2.79,982.97,0.89,0.16,0.08,419.87,11.25,26.97,0.91,...,0.0,0.0,0.0,0.0000,0.0000,0.0000,0.0784,0.0000,0.0000,157.0897
1153,23.12,2.79,982.97,0.89,0.16,0.08,419.87,11.25,26.97,0.91,...,0.0,0.0,0.0,0.0000,0.0000,0.0000,0.0784,0.0000,0.0000,173.6035


In [7]:
test_df_poly

Unnamed: 0,X1,X2,X3,X4,X5,X6,X7,X8,X9,X10,...,X80 X81,X80 X82,X80 X83,X81^2,X81 X82,X81 X83,X82^2,X82 X83,X83^2,OV
0,23.62,9.35,705.91,0.64,0.14,0.08,127.48,4.25,7.30,0.93,...,0.0002,0.005,0.0008,0.0001,0.0025,0.0004,0.0625,0.0100,0.0016,130.12020
1,23.62,9.35,705.91,0.64,0.14,0.08,127.48,4.25,7.30,0.93,...,0.0002,0.005,0.0008,0.0001,0.0025,0.0004,0.0625,0.0100,0.0016,115.67640
2,23.62,9.35,705.91,0.64,0.14,0.08,127.48,4.25,7.30,0.93,...,0.0002,0.005,0.0008,0.0001,0.0025,0.0004,0.0625,0.0100,0.0016,104.88190
3,24.35,4.92,635.52,0.64,0.14,0.09,78.09,3.22,6.29,0.92,...,0.0002,0.005,0.0008,0.0001,0.0025,0.0004,0.0625,0.0100,0.0016,108.79570
4,23.62,9.35,705.91,0.64,0.14,0.08,127.48,4.25,7.30,0.93,...,0.0002,0.005,0.0008,0.0001,0.0025,0.0004,0.0625,0.0100,0.0016,119.74150
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
495,25.22,14.40,397.21,0.59,0.14,0.08,38.31,2.17,53.05,0.83,...,0.0000,0.000,0.0000,0.0004,0.0098,0.0006,0.2401,0.0147,0.0009,74.94453
496,24.94,56.62,655.34,0.62,0.14,0.07,25.69,2.19,14.11,0.89,...,0.0000,0.000,0.0000,0.0004,0.0098,0.0006,0.2401,0.0147,0.0009,59.39213
497,25.22,14.40,397.21,0.59,0.14,0.08,38.31,2.17,53.05,0.83,...,0.0000,0.000,0.0000,0.0004,0.0098,0.0006,0.2401,0.0147,0.0009,70.32605
498,25.22,14.40,397.21,0.59,0.14,0.08,38.31,2.17,53.05,0.83,...,0.0000,0.000,0.0000,0.0004,0.0098,0.0006,0.2401,0.0147,0.0009,77.89170


In [8]:
import sys
import xgboost
h2o.init()

train = h2o.H2OFrame(train_df_poly)
test = h2o.H2OFrame(test_df_poly)

# AutoML 実行（ここでは最大60秒間）
aml = H2OAutoML(max_runtime_secs=60, seed=1)
aml.train(y="OV", training_frame=train)

# 最も高性能なモデル（**leader model**）を取得し、テストデータで評価
leader = aml.leader
perf = leader.model_performance(test_data=test)
print(perf)
# leader（リーダーモデル）が何かを確認するには、type() や summary() を使います。
# leaderのタイプ（モデル種別）を表示
print(type(leader))

# モデルのサマリーを表示して詳細を確認（たとえばGBM, GLM, DeepLearning, などと表示される）
leader.summary()

# どんなモデルかざっくり確認（モデルIDや説明）
print(leader.algo)   # 'gbm', 'glm', 'deeplearning' など
print(leader.model_id)



Checking whether there is an H2O instance running at http://localhost:54321..... not found.
Attempting to start a local H2O server...
  Java Version: openjdk version "25.0.1" 2025-10-21; OpenJDK Runtime Environment Homebrew (build 25.0.1); OpenJDK 64-Bit Server VM Homebrew (build 25.0.1, mixed mode, sharing)
  Starting server from /Users/yutotakagi/Develop/ass-production-management2/.venv/lib/python3.12/site-packages/h2o/backend/bin/h2o.jar
  Ice root: /var/folders/3b/3hxgpzc57b70j4b1d7my6wfc0000gn/T/tmpbe847t_4
  JVM stdout: /var/folders/3b/3hxgpzc57b70j4b1d7my6wfc0000gn/T/tmpbe847t_4/h2o_yutotakagi_started_from_python.out
  JVM stderr: /var/folders/3b/3hxgpzc57b70j4b1d7my6wfc0000gn/T/tmpbe847t_4/h2o_yutotakagi_started_from_python.err
  Server is running at http://127.0.0.1:54321
Connecting to H2O server at http://127.0.0.1:54321 ... successful.


0,1
H2O_cluster_uptime:,01 secs
H2O_cluster_timezone:,Asia/Tokyo
H2O_data_parsing_timezone:,UTC
H2O_cluster_version:,3.46.0.9
H2O_cluster_version_age:,1 month and 16 days
H2O_cluster_name:,H2O_from_python_yutotakagi_gynn99
H2O_cluster_total_nodes:,1
H2O_cluster_free_memory:,5.984 Gb
H2O_cluster_total_cores:,10
H2O_cluster_allowed_cores:,10


Parse progress: |████████████████████████████████████████████████████████████████| (done) 100%
Parse progress: |████████████████████████████████████████████████████████████████| (done) 100%
AutoML progress: |
14:44:56.261: AutoML: XGBoost is not available; skipping it.
14:44:56.325: _train param, Dropping bad and constant columns: [X23 X48, X23 X47, X23^2, X23 X49, X23 X44, X23 X43, X10 X23, X23 X46, X9 X23, X23 X45, X19 X23, X8 X23, X23, X23 X40, X7 X23, X23 X83, X23 X42, X23 X41, X23 X80, X18 X23, X23 X82, X23 X81, X23 X59, X23 X58, X23 X55, X20 X23, X23 X54, X23 X57, X23 X56, X2 X23, X21 X23, X67 X80, X17 X23, X23 X51, X23 X50, X23 X53, X23 X52, X16 X23, X1 X23, X23 X26, X23 X25, X23 X69, X4 X23, X23 X28, X23 X27, X23 X66, X23 X65, X23 X24, X23 X68, X23 X67, X15 X23, X78 X80, X23 X29, X67 X78, X13 X23, X65 X67, X22 X23, X23 X62, X23 X61, X14 X23, X23 X64, X23 X63, X3 X23, X23 X60, X12 X23, X23 X37, X23 X36, X23 X39, X23 X38, X6 X23, X23 X33, X23 X77, X23 X32, X23 X76, X23 X35, X23 X

In [9]:
# OV分布
train["OV"].summary()
test["OV"].summary()

  train["OV"].summary()


Unnamed: 0,OV
type,real
mins,52.56268
mean,156.03368903030298
maxs,418.4056
sigma,54.67970359978811
zeros,0
missing,0
0,111.2886
1,201.5617
2,111.941


Unnamed: 0,OV
type,real
mins,31.38486
mean,102.31979333999998
maxs,395.7734
sigma,67.2478055882895
zeros,0
missing,0
0,130.1202
1,115.6764
2,104.8819


In [10]:
perf = leader.model_performance(test_data=test)
print(perf)

ModelMetricsRegressionGLM: stackedensemble
** Reported on test data. **

MSE: 396302.127358176
RMSE: 629.5253190763466
MAE: 275.1083013449652
RMSLE: NaN
Mean Residual Deviance: 396302.127358176
R^2: -86.8091210765179
Null degrees of freedom: 499
Residual degrees of freedom: 498
Null deviance: 3915714.86921709
Residual deviance: 198151063.679088
AIC: 7869.904606811682


In [11]:
leader

key,value
Stacking strategy,blending
Number of base models (used / total),1/2
# GBM base models (used / total),0/1
# GLM base models (used / total),1/1
Metalearner algorithm,GLM
Metalearner fold assignment scheme,AUTO
Metalearner nfolds,0
Metalearner fold_column,
Custom metalearner hyperparameters,


In [12]:
# leader（リーダーモデル）が何かを確認するには、type() や summary() を使います。
# leaderのタイプ（モデル種別）を表示
print(type(leader))

# モデルのサマリーを表示して詳細を確認（たとえばGBM, GLM, DeepLearning, などと表示される）
leader.summary()

# どんなモデルかざっくり確認（モデルIDや説明）
print(leader.algo)   # 'gbm', 'glm', 'deeplearning' など
print(leader.model_id)


<class 'h2o.estimators.stackedensemble.H2OStackedEnsembleEstimator'>
stackedensemble
StackedEnsemble_BestOfFamily_1_AutoML_1_20260110_144455
