In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split

# 讀取資料
df = pd.read_csv('data/CA_Weather_Fire_Dataset_1984-2025.csv')

df.head(5)


Unnamed: 0,DATE,PRECIPITATION,MAX_TEMP,MIN_TEMP,AVG_WIND_SPEED,FIRE_START_DAY,YEAR,TEMP_RANGE,WIND_TEMP_RATIO,MONTH,SEASON,LAGGED_PRECIPITATION,LAGGED_AVG_WIND_SPEED,DAY_OF_YEAR
0,1984-01-01,0.0,79.0,51.0,4.7,False,1984,28.0,0.059494,1,Winter,0.0,4.7,1
1,1984-01-02,0.0,71.0,46.0,5.59,False,1984,25.0,0.078732,1,Winter,0.0,5.145,2
2,1984-01-03,0.0,70.0,47.0,5.37,False,1984,23.0,0.076714,1,Winter,0.0,5.22,3
3,1984-01-04,0.0,76.0,45.0,4.7,False,1984,31.0,0.061842,1,Winter,0.0,5.09,4
4,1984-01-05,0.0,74.0,49.0,5.14,False,1984,25.0,0.069459,1,Winter,0.0,5.1,5


In [2]:
df.describe()


Unnamed: 0,PRECIPITATION,MAX_TEMP,MIN_TEMP,AVG_WIND_SPEED,YEAR,TEMP_RANGE,WIND_TEMP_RATIO,MONTH,LAGGED_PRECIPITATION,LAGGED_AVG_WIND_SPEED,DAY_OF_YEAR
count,14987.0,14987.0,14987.0,14976.0,14988.0,14987.0,14976.0,14988.0,14988.0,14988.0,14988.0
mean,0.032315,70.534997,56.494095,7.435098,2004.016813,14.040902,0.107019,6.518281,0.226188,7.434198,182.992994
std,0.179544,7.263447,6.767685,2.129985,11.843342,5.995327,0.03563,3.451037,0.648705,1.387849,105.523627
min,0.0,50.0,33.0,1.79,1984.0,2.0,0.023553,1.0,0.0,3.227143,1.0
25%,0.0,65.0,51.0,6.04,1994.0,10.0,0.085238,4.0,0.0,6.518571,92.0
50%,0.0,70.0,57.0,7.16,2004.0,12.0,0.102222,7.0,0.0,7.478571,183.0
75%,0.0,75.0,62.0,8.5,2014.0,17.0,0.120462,10.0,0.06,8.278571,274.0
max,4.53,106.0,77.0,26.17,2025.0,41.0,0.459123,12.0,8.18,13.932857,366.0


| 欄位名稱               | 說明                                                                 |
|------------------------|----------------------------------------------------------------------|
| DATE                   | 當天的觀測日期                                                       |
| PRECIPITATION          | 每日降水量（英吋）                                                   |
| MAX_TEMP               | 每日最高氣溫（華氏）                                                 |
| MIN_TEMP               | 每日最低氣溫（華氏）                                                 |
| AVG_WIND_SPEED         | 每日平均風速（英里/小時）                                           |
| FIRE_START_DAY         | 是否於該日發生野火（布林值：True/False）                            |
| YEAR                   | 年份                                                                 |
| TEMP_RANGE             | 當日最高與最低溫差，反映氣溫變化程度                                |
| WIND_TEMP_RATIO        | 平均風速與最高溫度的比值，捕捉風與溫度間的動態關係                  |
| MONTH                  | 月份（1–12）                                                        |
| SEASON                 | 季節（Winter, Spring, Summer, Fall）                                |
| LAGGED_PRECIPITATION   | 前 7 天的累積降水量，反映近一週的濕潤條件                            |
| LAGGED_AVG_WIND_SPEED  | 前 7 天的平均風速，反映持續的風力狀況                                |
| DAY_OF_YEAR            | 當年度中的天數（1–365 或 366）                                     |


<h5>

- 對月份做 sin/cos 轉換  保留了數值間的連續性與週期性。
  
- 1 月與 12 月其實很接近，但 get_dummies() 會把它們視為完全無關。

- 而 sin/cos 轉換會保留這種「循環性」。

</h5>

In [3]:
import numpy as np

df['MONTH_SIN'] = np.sin(2 * np.pi * df['MONTH'] / 12)
df['MONTH_COS'] = np.cos(2 * np.pi * df['MONTH'] / 12)
df['DOY_SIN'] = np.sin(2 * np.pi * df['DAY_OF_YEAR'] / 366)
df['DOY_COS'] = np.cos(2 * np.pi * df['DAY_OF_YEAR'] / 366)

df = df.drop(columns=['DATE', 'MONTH', 'DAY_OF_YEAR'])

# 將 FIRE_START_DAY 轉為整數型（0 或 1）
df['FIRE_START_DAY'] = df['FIRE_START_DAY'].astype(int)

<h5>

- `PRECIPITATION`幾乎所有值都是 0（沒下雨），只有極少數是非零，所以我們轉成是否有下雨 

- `TEMP_MEAN` 溫度範圍（TEMP_RANGE）差異滿大的，從 2 到 41，直接用溫度平均值，有時比分開的 MAX、MIN 更有區分力。

</h5>

In [None]:
df['IS_RAINING'] = (df['PRECIPITATION'] > 0).astype(int) #幾乎所有值都是 0（沒下雨），只有極少數是非零。
df['TEMP_MEAN'] = (df['MAX_TEMP'] + df['MIN_TEMP'] / 2).astype(float) #溫度範圍（TEMP_RANGE）差異滿大的，從 2 到 41，直接用溫度平均值，有時比分開的 MAX、MIN 更有區分力。
df = df.drop(columns=['MAX_TEMP', 'MIN_TEMP','LAGGED_PRECIPITATION'])

In [5]:

df.describe()

Unnamed: 0,PRECIPITATION,AVG_WIND_SPEED,FIRE_START_DAY,YEAR,TEMP_RANGE,WIND_TEMP_RATIO,LAGGED_PRECIPITATION,LAGGED_AVG_WIND_SPEED,MONTH_SIN,MONTH_COS,DOY_SIN,DOY_COS,IS_RAINING,TEMP_MEAN
count,14987.0,14976.0,14988.0,14988.0,14987.0,14976.0,14988.0,14988.0,14988.0,14988.0,14988.0,14988.0,14988.0,14987.0
mean,0.032315,7.435098,0.331665,2004.016813,14.040902,0.107019,0.226188,7.434198,-0.004336929,-0.001308696,8.899901e-05,-0.001207,0.09214,98.782044
std,0.179544,2.129985,0.470827,11.843342,5.995327,0.03563,0.648705,1.387849,0.705653,0.7085902,0.7075638,0.706696,0.289234,9.773213
min,0.0,1.79,0.0,1984.0,2.0,0.023553,0.0,3.227143,-1.0,-1.0,-0.9999632,-1.0,0.0,68.5
25%,0.0,6.04,0.0,1994.0,10.0,0.085238,0.0,6.518571,-0.8660254,-0.8660254,-0.710135,-0.704066,0.0,91.5
50%,0.0,7.16,0.0,2004.0,12.0,0.102222,0.0,7.478571,-2.449294e-16,-1.83697e-16,1.224647e-16,-0.008583,0.0,99.0
75%,0.0,8.5,1.0,2014.0,17.0,0.120462,0.06,8.278571,0.5,0.8660254,0.710135,0.704066,0.0,105.5
max,4.53,26.17,1.0,2025.0,41.0,0.459123,8.18,13.932857,1.0,1.0,0.9999632,1.0,1.0,141.0


- 根據[NOAA](https://www.noaa.gov/noaa-wildfire)
- 和[Climate](https://www.climate.gov/news-features/event-tracker/weather-and-climate-influences-january-2025-fires-around-los-angeles)

- 1. 氣溫變異指數（Temperature Variation Index）  

| **項目**      | **內容**                                                                                   |
|---------------|--------------------------------------------------------------------------------------------|
| **定義**      | 每日氣溫的變異程度，反映當天最高和最低氣溫之間的差異。較大的溫差可能與氣候極端性相關，進一步加劇火災風險。 |
| **公式**      | `TEMP_VARIATION = MAX_TEMP - MIN_TEMP`                                                     |

- 2. 降水與風速比率（Precipitation-Wind Ratio）  

| **項目**      | **內容**                                                                                   |
|---------------|--------------------------------------------------------------------------------------------|
| **定義**      | 衡量降水量與風速之間的關聯。當降水量低且風速高時，通常意味著乾燥條件與強風並存，火災風險上升。            |
| **公式**      | `PRECIPITATION_WIND_RATIO = PRECIPITATION / AVG_WIND_SPEED`                                |

- 3. 季節性降水與風速關聯指數（Seasonal Precipitation-Wind Index）  

| **項目**      | **內容**                                                                                   |
|---------------|--------------------------------------------------------------------------------------------|
| **定義**      | 綜合考量季節（SEASON）對降水與風速影響的指標。不同季節降水和風速的組合，會對火災風險產生不同作用。          |
| **公式**      | `SEASONAL_PRECIP_WIND = (PRECIPITATION * (SEASON == 'Winter')) + (AVG_WIND_SPEED * (SEASON == 'Summer'))` |

- 4. 季節性乾燥指數（Seasonal Dryness Index）  

| **項目**      | **內容**                                                                                   |
|---------------|--------------------------------------------------------------------------------------------|
| **定義**      | 根據當季（秋季或冬季）的降水量與日溫差來評估乾燥程度。乾燥季節中的高乾燥值與火災風險高度相關。             |
| **公式**      | `SEASONAL_DRYNESS = (PRECIPITATION * (SEASON == 'Fall' or SEASON == 'Winter')) / (MAX_TEMP - MIN_TEMP)` |

- 5. 日中溫差與風速結合指數（Diurnal Temperature and Wind Speed Index） 

| **項目**      | **內容**                                                                                   |
|---------------|--------------------------------------------------------------------------------------------|
| **定義**      | 此指標將每日的氣溫差（即日間溫度變化）與風速結合，評估乾燥和高風速的條件下，火災風險的潛在性。            |
| **公式**      | `DIURNAL_TEMP_WIND = (MAX_TEMP - MIN_TEMP) * AVG_WIND_SPEED` |


In [None]:
#--------------------------------------
# 我分析而得的衍生指標 :
'''



df['PRECIPITATION_WIND_RATIO'] = df['PRECIPITATION'] / df['AVG_WIND_SPEED']
df['SEASONAL_PRECIP_WIND'] = (df['PRECIPITATION'] * (df['SEASON'] == 'Winter')) + (df['AVG_WIND_SPEED'] * (df['SEASON'] == 'Summer'))
df['SEASONAL_DRYNESS'] = (df['PRECIPITATION'] * ((df['SEASON'] == 'Fall') | (df['SEASON'] == 'Winter'))) / (df['MAX_TEMP'] - df['MIN_TEMP'])
df['DIURNAL_TEMP_WIND'] = (df['MAX_TEMP'] - df['MIN_TEMP']) * df['AVG_WIND_SPEED']

'''

# One-Hot Encoding: SEASON
df = pd.get_dummies(df, columns=['SEASON'])
season_cols = ['SEASON_Fall', 'SEASON_Spring', 'SEASON_Summer', 'SEASON_Winter']
df[season_cols] = df[season_cols].astype(int)

In [7]:
# 特徵與目標分離
X = df.drop(['FIRE_START_DAY'], axis=1)
y = df['FIRE_START_DAY']

2. 數值特徵標準化（Standardization）
為避免某些欄位（如溫度或風速）對模型訓練造成不公平的權重，我們可以對所有數值特徵做 **Z-score** 標準化 **（均值為0，標準差為1）**，但不包含 One-Hot 欄位。

In [36]:
'''
from sklearn.preprocessing import StandardScaler

# 找出所有數值欄位（排除 one-hot 和目標變數）
numeric_cols = X.select_dtypes(include=['float64', 'int64']).columns

# 建立標準化物件並套用於訓練集和測試集
scaler = StandardScaler()
X[numeric_cols] = scaler.fit_transform(X[numeric_cols])
'''

"\nfrom sklearn.preprocessing import StandardScaler\n\n# 找出所有數值欄位（排除 one-hot 和目標變數）\nnumeric_cols = X.select_dtypes(include=['float64', 'int64']).columns\n\n# 建立標準化物件並套用於訓練集和測試集\nscaler = StandardScaler()\nX[numeric_cols] = scaler.fit_transform(X[numeric_cols])\n"

In [8]:
X.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 14988 entries, 0 to 14987
Data columns (total 17 columns):
 #   Column                 Non-Null Count  Dtype  
---  ------                 --------------  -----  
 0   PRECIPITATION          14987 non-null  float64
 1   AVG_WIND_SPEED         14976 non-null  float64
 2   YEAR                   14988 non-null  int64  
 3   TEMP_RANGE             14987 non-null  float64
 4   WIND_TEMP_RATIO        14976 non-null  float64
 5   LAGGED_PRECIPITATION   14988 non-null  float64
 6   LAGGED_AVG_WIND_SPEED  14988 non-null  float64
 7   MONTH_SIN              14988 non-null  float64
 8   MONTH_COS              14988 non-null  float64
 9   DOY_SIN                14988 non-null  float64
 10  DOY_COS                14988 non-null  float64
 11  IS_RAINING             14988 non-null  int64  
 12  TEMP_MEAN              14987 non-null  float64
 13  SEASON_Fall            14988 non-null  bool   
 14  SEASON_Spring          14988 non-null  bool   
 15  SE

In [10]:
print(X.isna().sum())
X = X.fillna(X.median())
assert X.isna().sum().sum() == 0, "There are still missing values in the dataset."

PRECIPITATION             1
AVG_WIND_SPEED           12
YEAR                      0
TEMP_RANGE                1
WIND_TEMP_RATIO          12
LAGGED_PRECIPITATION      0
LAGGED_AVG_WIND_SPEED     0
MONTH_SIN                 0
MONTH_COS                 0
DOY_SIN                   0
DOY_COS                   0
IS_RAINING                0
TEMP_MEAN                 1
SEASON_Fall               0
SEASON_Spring             0
SEASON_Summer             0
SEASON_Winter             0
dtype: int64


### 切割資料集

In [11]:
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=42, test_size=0.3)

In [12]:
print(y.value_counts(normalize=True))

FIRE_START_DAY
0    0.668335
1    0.331665
Name: proportion, dtype: float64


In [13]:
X_train.head(5) 

Unnamed: 0,PRECIPITATION,AVG_WIND_SPEED,YEAR,TEMP_RANGE,WIND_TEMP_RATIO,LAGGED_PRECIPITATION,LAGGED_AVG_WIND_SPEED,MONTH_SIN,MONTH_COS,DOY_SIN,DOY_COS,IS_RAINING,TEMP_MEAN,SEASON_Fall,SEASON_Spring,SEASON_Summer,SEASON_Winter
11798,0.0,7.61,2016,22.0,0.093951,0.0,8.277143,0.866025,-0.5,0.944489,-0.328542,0,110.5,False,True,False,False
4885,0.0,8.5,1997,11.0,0.114865,0.0,7.414286,0.5,-0.866025,0.710135,-0.704066,0,105.5,False,True,False,False
1572,0.0,8.5,1988,14.0,0.132812,0.68,10.674286,0.866025,-0.5,0.93871,-0.344707,0,89.0,False,True,False,False
12985,0.0,10.07,2019,9.0,0.139861,0.0,8.245714,-0.5,-0.866025,-0.320423,-0.947274,0,103.5,False,False,True,False
879,0.0,8.05,1986,11.0,0.11338,0.0,7.35,0.5,-0.866025,0.551102,-0.834438,0,101.0,False,True,False,False


In [14]:
import mlflow
from mlflow.tracking.client import MlflowClient
mlflow.set_experiment("CA_Weather_Fire")

<Experiment: artifact_location='file:///c:/Users/ygz08/Desktop/Git/localgit/MLOPs/Predictable_wildfire/mlruns/995565665349288736', creation_time=1746261086177, experiment_id='995565665349288736', last_update_time=1746261086177, lifecycle_stage='active', name='CA_Weather_Fire', tags={}>

<hr>

## LogisticRegression

In [16]:
from sklearn.linear_model import LogisticRegression

with mlflow.start_run(run_name='LogisticRegression'):#mlflow
    mlflow.tensorflow.autolog()#mlflow
    max_iter=1000 
    #mlflow.log_param("max_iter", max_iter) #mlflow紀錄參數n_estimators
# 方法四：使用 class_weight='balanced'
    log_reg = LogisticRegression(max_iter=max_iter, class_weight='balanced')
    log_reg.fit(X_train, y_train)
    run_id = mlflow.active_run().info.run_id#mlflow
    print(f"Model saved in run {run_id}")#mlflow
    

    # 檢查訓練與測試分數
    print("Train score:", log_reg.score(X_train, y_train))
    print("Test score:", log_reg.score(X_test, y_test))

    mlflow.log_metric("Train score", log_reg.score(X_train, y_train))#mlflow
    mlflow.log_metric("Test score", log_reg.score(X_test, y_test))#mlflow


    # 存檔模型mlflow
    model_name = "LogisticRegression-model"
    mlflow.sklearn.log_model(     #mlflow.sklearn.log_model() #紀錄sklearn模型
        sk_model=log_reg, 
        artifact_path="LogisticRegression-model",
        registered_model_name=model_name,  #
    )

    from sklearn.metrics import classification_report
    print(classification_report(y_test, log_reg.predict(X_test)))

STOP: TOTAL NO. OF ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


Model saved in run d3a7caf625864e4090a66b75d4c8f4bf
Train score: 0.751691926413116
Test score: 0.7511674449633089




              precision    recall  f1-score   support

           0       0.88      0.73      0.80      3047
           1       0.58      0.80      0.67      1450

    accuracy                           0.75      4497
   macro avg       0.73      0.76      0.74      4497
weighted avg       0.79      0.75      0.76      4497



Registered model 'LogisticRegression-model' already exists. Creating a new version of this model...
Created version '11' of model 'LogisticRegression-model'.


✅ 模型優點
- 對「有火災」的 recall 很高：0.80
    - 表示你抓到 80% 的火災案例，這對野火預測是關鍵（比 precision 更重要）。
- f1-score 有火災類別也達到 0.67，代表整體模型並不差。

⚠️ 模型限制
- precision 只有 0.58，也就是說：
    - 模型預測「有火災」的案例中，有 42% 是誤報（false positive）。


<hr>

## Random Forest

In [35]:
from sklearn.ensemble import RandomForestClassifier

with mlflow.start_run(run_name='RandomForest'):#mlflow
    mlflow.tensorflow.autolog()#mlflow
    #max_iter=1000 
    #mlflow.log_param("max_iter", max_iter) #mlflow紀錄參數n_estimators
# 方法四：使用 class_weight='balanced'
    #rf = RandomForestClassifier(random_state=42, class_weight='balanced')
    rf = RandomForestClassifier(n_estimators=150, max_depth=15, class_weight='balanced')
    rf.fit(X_train, y_train)
    run_id = mlflow.active_run().info.run_id#mlflow
    print(f"Model saved in run {run_id}")#mlflow
    

    # 檢查訓練與測試分數
    print("Train score:", rf.score(X_train, y_train))
    print("Test score:", rf.score(X_test, y_test))

    mlflow.log_metric("Train score", rf.score(X_train, y_train))#mlflow
    mlflow.log_metric("Test score", rf.score(X_test, y_test))#mlflow


    # 存檔模型mlflow
    model_name = "RandomForest-model"
    mlflow.sklearn.log_model(     #mlflow.sklearn.log_model() #紀錄sklearn模型
        sk_model=rf, 
        artifact_path="RandomForest-model",
        registered_model_name=model_name,  #
    )
    
    from sklearn.metrics import classification_report
    print(classification_report(y_test, rf.predict(X_test)))


Model saved in run 83f9e1397f6c4b18aaade5c87e2d8f07
Train score: 0.9454770755885997
Test score: 0.7894151656659996




              precision    recall  f1-score   support

           0       0.85      0.83      0.84      3047
           1       0.67      0.69      0.68      1450

    accuracy                           0.79      4497
   macro avg       0.76      0.76      0.76      4497
weighted avg       0.79      0.79      0.79      4497



Registered model 'RandomForest-model' already exists. Creating a new version of this model...
Created version '8' of model 'RandomForest-model'.


### 模型評估 :
- Before Tuning
  - **Train score: 1.0**
  - **Test score: 0.7883033133199912**
  - **accuracy  0.79**

✅ 模型優勢
- 對無火災類別（0）有較高的 precision 和 recall，尤其 recall 高達 0.88，模型能夠準確地識別大部分「無火災」的情況。
- f1-score 達到了 0.85，代表模型對「無火災」的預測表現非常好。

⚠️ 模型的挑戰
- 對有火災（1）類別，precision 是 0.70，而 recall 只有 0.60，這意味著：
    - 對有火災的預測還有進步空間，模型錯過了一部分火災案例，這對於防火系統來說是很關鍵的。

In [None]:
# Tuned Random Forest¶
from sklearn.model_selection import  RandomizedSearchCV
param_dist = {
    'n_estimators': [100, 200, 300, 400, 500],
    'max_depth': [None, 5, 10, 20, 30],
    'min_samples_split': [2, 5, 10],
    'min_samples_leaf': [1, 2, 4],
    'bootstrap': [True, False]
}

# Set up the search
random_search = RandomizedSearchCV(
    estimator=RandomForestClassifier(random_state=42),
    param_distributions=param_dist,
    n_iter=30,
    cv=5,
    scoring='roc_auc',
    verbose=1,
    random_state=42,
    n_jobs=-1
)

# Fit search
random_search.fit(X_train, y_train)a

Fitting 5 folds for each of 30 candidates, totalling 150 fits


In [19]:
# Get the best model
from sklearn.metrics import classification_report, confusion_matrix, accuracy_score, roc_auc_score
best_rf = random_search.best_estimator_

# Predict
y_pred_best_rf = best_rf.predict(X_test)
y_proba_best_rf = best_rf.predict_proba(X_test)[:, 1]

# Evaluation
print("--- Best Random Forest (Tuned) ---")
print("Classification Report:\n", classification_report(y_test, y_pred_best_rf))
print("Confusion Matrix:\n", confusion_matrix(y_test, y_pred_best_rf))
print("ROC AUC Score:", roc_auc_score(y_test, y_proba_best_rf))

--- Best Random Forest (Tuned) ---
Classification Report:
               precision    recall  f1-score   support

           0       0.84      0.87      0.85      3047
           1       0.70      0.65      0.68      1450

    accuracy                           0.80      4497
   macro avg       0.77      0.76      0.76      4497
weighted avg       0.79      0.80      0.80      4497

Confusion Matrix:
 [[2644  403]
 [ 506  944]]
ROC AUC Score: 0.8581001097744532


### 結果分析：
- After Tuning
  - **accuracy  0.8**
  - **ROC AUC  0.858**

<hr>

## XG Boosting

In [25]:
from xgboost import XGBClassifier
#Train score: 0.9090649127823849
#Test score: 0.7820769401823437

with mlflow.start_run(run_name='XGBClassifier'):#mlflow
    mlflow.tensorflow.autolog()#mlflow
    max_iter=1000 
    #mlflow.log_param("max_iter", max_iter) #mlflow紀錄參數n_estimators
# 方法四：使用 class_weight='balanced'
    xgb = XGBClassifier(
        random_state=42, 
        class_weight='balanced',  # 對不平衡數據進行調整
        alpha=0.1,  # L1 正則化強度，通常設為較小的正值
        lambda_=1.0,  # L2 正則化強度，默認為1
        n_estimators=100,  # 樹的數量
        max_depth=6,  # 每棵樹的最大深度
        learning_rate=0.1  # 學習率
    )
    xgb.fit(X_train, y_train)
    run_id = mlflow.active_run().info.run_id#mlflow
    print(f"Model saved in run {run_id}")#mlflow
    

    # 檢查訓練與測試分數
    print("Train score:", xgb.score(X_train, y_train))
    print("Test score:", xgb.score(X_test, y_test))

    mlflow.log_metric("Train score", xgb.score(X_train, y_train))#mlflow
    mlflow.log_metric("Test score", xgb.score(X_test, y_test))#mlflow


    # 存檔模型mlflow
    model_name = "XGBClassifier-model"
    mlflow.sklearn.log_model(     #mlflow.sklearn.log_model() #紀錄sklearn模型
        sk_model=xgb, 
        artifact_path="XGBClassifier-model",
        registered_model_name=model_name,  #
    )

    from sklearn.metrics import classification_report
    print(classification_report(y_test, xgb.predict(X_test)))

Parameters: { "class_weight", "lambda_" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)


Model saved in run 9e9d679c113c40a18cab588059d55060
Train score: 0.8434848918120293
Test score: 0.7954191683344451




              precision    recall  f1-score   support

           0       0.84      0.87      0.85      3047
           1       0.70      0.64      0.67      1450

    accuracy                           0.80      4497
   macro avg       0.77      0.76      0.76      4497
weighted avg       0.79      0.80      0.79      4497



Registered model 'XGBClassifier-model' already exists. Creating a new version of this model...
Created version '6' of model 'XGBClassifier-model'.


### 模型評估：

- 精度 (Precision)：對於類別 0（負樣本），精度達到 0.84，對於類別 1（正樣本），精度為 0.70。這意味著模型在預測正樣本時可能有一些錯誤，可能會錯誤地標記一些負樣本為正樣本。

- 召回率 (Recall)：對於類別 0，召回率為 0.87，這是比較好的，說明模型能夠正確標記大部分的負樣本。然而，對於類別 1，召回率是 0.64，意味著有相當一部分的正樣本未被成功預測出來，這可能是過擬合的跡象或者模型對某些特徵不夠敏感。

- F1-score：在平衡精度和召回率後，類別 0 的 F1-score 是 0.85，而類別 1 的 F1-score 是 0.67，這表明模型對於類別 0 的預測表現要好於類別 1。

In [27]:
from sklearn.model_selection import train_test_split, RandomizedSearchCV, GridSearchCV

In [28]:
# 2) Tuned XGBoost via RandomizedSearchCV
param_dist_xgb = {
    'alpha' : [0.0, 0.1, 0.5, 1.0, 2.0],  # L1 正則化強度，通常設為較小的正值
    'lambda_' : [0.0, 0.1, 0.5, 1.0, 2.0],  # L2 正則化強度，默認為1
    'n_estimators': [100, 200, 300, 400, 500],
    'max_depth': [3, 5, 7, 10, 15],
    'learning_rate': [0.01, 0.05, 0.1, 0.2],
    'subsample': [0.6, 0.8, 1.0],
    'colsample_bytree': [0.6, 0.8, 1.0],
    'gamma': [0, 0.1, 0.2, 0.5],
    'min_child_weight': [1, 3, 5]
}

xgb_search = RandomizedSearchCV(
    estimator=XGBClassifier(random_state=42, use_label_encoder=False, eval_metric='logloss'),
    param_distributions=param_dist_xgb,
    n_iter=30,
    cv=5,
    scoring='roc_auc',
    verbose=1,
    random_state=42,
    n_jobs=-1
)

# Fit search
xgb_search.fit(X_train, y_train)

# Get the best model
best_xgb = xgb_search.best_estimator_

# Predict with best model
y_pred_best_xgb = best_xgb.predict(X_test)
y_proba_best_xgb = best_xgb.predict_proba(X_test)[:, 1]

Fitting 5 folds for each of 30 candidates, totalling 150 fits


Parameters: { "lambda_", "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)


In [29]:
from sklearn.metrics import classification_report, confusion_matrix, accuracy_score, roc_auc_score
print("--- Best XGBoost (Tuned) ---")
print("Best Parameters:", xgb_search.best_params_)
print("Classification Report:\n", classification_report(y_test, y_pred_best_xgb))
print("Confusion Matrix:\n", confusion_matrix(y_test, y_pred_best_xgb))
print("ROC AUC Score:", roc_auc_score(y_test, y_proba_best_xgb))

--- Best XGBoost (Tuned) ---
Best Parameters: {'subsample': 0.8, 'n_estimators': 500, 'min_child_weight': 1, 'max_depth': 7, 'learning_rate': 0.01, 'lambda_': 0.1, 'gamma': 0.1, 'colsample_bytree': 1.0, 'alpha': 0.5}
Classification Report:
               precision    recall  f1-score   support

           0       0.84      0.87      0.85      3047
           1       0.70      0.65      0.67      1450

    accuracy                           0.80      4497
   macro avg       0.77      0.76      0.76      4497
weighted avg       0.79      0.80      0.79      4497

Confusion Matrix:
 [[2636  411]
 [ 510  940]]
ROC AUC Score: 0.8619060013806683


### 結果分析：
- 精度 (Precision)：

    - 類別 0（沒有野火發生） precision: 0.84，這表明大部分被預測為沒有野火的樣本確實是沒有野火。

    - 類別 1（有野火發生） precision: 0.70，這表示模型對於有野火的預測並不如對無野火的預測準確。

- 召回率 (Recall)：

    - 類別 0 recall: 0.87，這說明模型能夠有效地捕捉到大多數沒有野火的樣本。

    - 類別 1 recall: 0.65，這表示對有野火的預測仍然有改進的空間，可能是因為過擬合或特徵選擇不夠充分。

- F1 分數 (F1-Score)：

    - 類別 0 F1-Score: 0.85，這說明對類別 0 的預測效果良好。

    - 類別 1 F1-Score: 0.67，這顯示對類別 1 的預測仍然有提升的空間，可能需要更多針對該類別的特徵或優化。

- ROC AUC Score：0.8619，這是衡量模型區分能力的指標，值較高，說明模型對區分兩類有較好的能力。

<hr>

## DNN

In [31]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout, BatchNormalization
from tensorflow.keras.callbacks import EarlyStopping
from tensorflow.keras.regularizers import l1, l2, l1_l2
from tensorflow.keras import regularizers

In [32]:
from sklearn.utils.class_weight import compute_class_weight
import numpy as np
classes = np.unique(y_train)
class_weights = compute_class_weight(class_weight='balanced', classes=classes, y=y_train)
class_weight_dict = dict(zip(classes, class_weights))

# 查看結果
print("Class Weights:", class_weight_dict)

Class Weights: {np.int64(0): np.float64(0.7525824964131994), np.int64(1): np.float64(1.4897756319227493)}


In [34]:
with mlflow.start_run(run_name='DNN'):#mlflow
    mlflow.tensorflow.autolog()#mlflow    

    n_input = X_train.shape[1]

    model = Sequential()
    model.add(BatchNormalization())
    model.add(Dense(256, input_shape=(n_input,), activation='relu',
                    kernel_regularizer=regularizers.l2(0.001)))  # L2
    model.add(Dropout(0.3))  # 增加 Dropout
    model.add(Dense(512, activation='relu', kernel_regularizer=regularizers.l2(0.001)))
    model.add(Dropout(0.3))
    model.add(Dense(128, activation='relu', kernel_regularizer=regularizers.l2(0.001)))
    model.add(Dropout(0.3))
    model.add(Dense(1, activation='sigmoid'))


    # 模型optimizer 和 learning rate


    initial_lr = 0.001
    from tensorflow.keras.optimizers import schedules
    lr_schedule = schedules.ExponentialDecay(
        initial_learning_rate=initial_lr,
        decay_steps=100000,
        decay_rate=0.96,
        staircase=True)
    from tensorflow.keras.optimizers import Adam
    optimizer = Adam(learning_rate=lr_schedule)
    model.summary()

    mlflow.log_param("loss", 'bce') #mlflow
    model.compile(loss='bce', optimizer=optimizer, metrics=['acc', 'Recall', 'Precision'])

    # EarlyStopping: 根據 val_loss 停止訓練
    early_stop = EarlyStopping(monitor='val_loss', patience=10, verbose=1, restore_best_weights=True)
    # ModelCheckpoint: 儲存最佳模型
    from tensorflow.keras.callbacks import ModelCheckpoint
    checkpoint = ModelCheckpoint('./models_temp/DNN_best_model.h5', monitor='val_loss', save_best_only=True, verbose=1)

    history = model.fit(X_train, y_train, validation_data=(X_test, y_test), epochs=100, batch_size=128, verbose=1, class_weight=class_weight_dict)

    train_loss, train_acc, train_recall, train_precision = model.evaluate(X_train, y_train, verbose=0)
    test_loss, test_acc, test_recall, test_precision = model.evaluate(X_test, y_test, verbose=0)
    mlflow.log_metric("Train score", train_acc)#mlflow
    mlflow.log_metric("Test score", test_acc)#mlflow
    #註冊模型
    run_id = mlflow.active_run().info.run_id#mlflow
    result = mlflow.register_model(
        model_uri=f"runs:/{run_id}/model",  # 你要用 mlflow.log_model 存的位置
        name="DNN-model"              # 註冊後的 model name
    )
    #confusion matrix
    from sklearn.metrics import classification_report
    y_pred = model.predict(X_test)
    y_pred_class = (y_pred > 0.5).astype(int) 
    print(classification_report(y_test, y_pred_class, digits=4))

    
    model.evaluate(X_test, y_test)

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)




Epoch 1/100
[1m82/82[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - Precision: 0.5738 - Recall: 0.7428 - acc: 0.7326 - loss: 1.0362



[1m82/82[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 9ms/step - Precision: 0.5740 - Recall: 0.7433 - acc: 0.7327 - loss: 1.0350 - val_Precision: 0.3224 - val_Recall: 1.0000 - val_acc: 0.3224 - val_loss: 14.8866
Epoch 2/100
[1m77/82[0m [32m━━━━━━━━━━━━━━━━━━[0m[37m━━[0m [1m0s[0m 3ms/step - Precision: 0.6017 - Recall: 0.8134 - acc: 0.7526 - loss: 0.7531



[1m82/82[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step - Precision: 0.6014 - Recall: 0.8129 - acc: 0.7526 - loss: 0.7511 - val_Precision: 0.3227 - val_Recall: 1.0000 - val_acc: 0.3231 - val_loss: 3.1040
Epoch 3/100
[1m79/82[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 3ms/step - Precision: 0.5989 - Recall: 0.7870 - acc: 0.7558 - loss: 0.6465



[1m82/82[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 6ms/step - Precision: 0.5993 - Recall: 0.7872 - acc: 0.7559 - loss: 0.6457 - val_Precision: 0.3253 - val_Recall: 1.0000 - val_acc: 0.3311 - val_loss: 1.9589
Epoch 4/100
[1m80/82[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 3ms/step - Precision: 0.5994 - Recall: 0.8117 - acc: 0.7549 - loss: 0.5864



[1m82/82[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step - Precision: 0.5994 - Recall: 0.8115 - acc: 0.7549 - loss: 0.5862 - val_Precision: 0.4625 - val_Recall: 0.9186 - val_acc: 0.6295 - val_loss: 0.7851
Epoch 5/100
[1m72/82[0m [32m━━━━━━━━━━━━━━━━━[0m[37m━━━[0m [1m0s[0m 4ms/step - Precision: 0.6146 - Recall: 0.7788 - acc: 0.7551 - loss: 0.5631



[1m82/82[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 6ms/step - Precision: 0.6138 - Recall: 0.7807 - acc: 0.7557 - loss: 0.5619 - val_Precision: 0.4985 - val_Recall: 0.8952 - val_acc: 0.6758 - val_loss: 0.7393
Epoch 6/100
[1m81/82[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 4ms/step - Precision: 0.6058 - Recall: 0.7998 - acc: 0.7634 - loss: 0.5297



[1m82/82[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 6ms/step - Precision: 0.6058 - Recall: 0.7999 - acc: 0.7634 - loss: 0.5297 - val_Precision: 0.5462 - val_Recall: 0.8517 - val_acc: 0.7240 - val_loss: 0.6011
Epoch 7/100
[1m78/82[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 3ms/step - Precision: 0.6011 - Recall: 0.7959 - acc: 0.7579 - loss: 0.5289



[1m82/82[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 6ms/step - Precision: 0.6015 - Recall: 0.7962 - acc: 0.7580 - loss: 0.5287 - val_Precision: 0.5856 - val_Recall: 0.8138 - val_acc: 0.7543 - val_loss: 0.5466
Epoch 8/100
[1m78/82[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 3ms/step - Precision: 0.6101 - Recall: 0.8052 - acc: 0.7626 - loss: 0.5182



[1m82/82[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step - Precision: 0.6097 - Recall: 0.8049 - acc: 0.7623 - loss: 0.5184 - val_Precision: 0.5951 - val_Recall: 0.7814 - val_acc: 0.7581 - val_loss: 0.5117
Epoch 9/100
[1m82/82[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step - Precision: 0.6054 - Recall: 0.7920 - acc: 0.7558 - loss: 0.5237 - val_Precision: 0.5876 - val_Recall: 0.8048 - val_acc: 0.7549 - val_loss: 0.5310
Epoch 10/100
[1m68/82[0m [32m━━━━━━━━━━━━━━━━[0m[37m━━━━[0m [1m0s[0m 3ms/step - Precision: 0.6077 - Recall: 0.7852 - acc: 0.7550 - loss: 0.5158



[1m82/82[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step - Precision: 0.6080 - Recall: 0.7871 - acc: 0.7557 - loss: 0.5146 - val_Precision: 0.6047 - val_Recall: 0.7924 - val_acc: 0.7661 - val_loss: 0.5010
Epoch 11/100
[1m82/82[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 10ms/step - Precision: 0.6096 - Recall: 0.8035 - acc: 0.7634 - loss: 0.5080 - val_Precision: 0.5883 - val_Recall: 0.8021 - val_acc: 0.7552 - val_loss: 0.5294
Epoch 12/100
[1m82/82[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step - Precision: 0.6173 - Recall: 0.8037 - acc: 0.7631 - loss: 0.5003 - val_Precision: 0.6060 - val_Recall: 0.7745 - val_acc: 0.7650 - val_loss: 0.5041
Epoch 13/100
[1m82/82[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 6ms/step - Precision: 0.6154 - Recall: 0.7994 - acc: 0.7635 - loss: 0.5052 - val_Precision: 0.6131 - val_Recall: 0.7607 - val_acc: 0.7681 - val_loss: 0.5025
Epoch 14/100
[1m82/82[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m



[1m82/82[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step - Precision: 0.6039 - Recall: 0.8092 - acc: 0.7577 - loss: 0.4986 - val_Precision: 0.6082 - val_Recall: 0.7848 - val_acc: 0.7676 - val_loss: 0.4971
Epoch 28/100
[1m82/82[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step - Precision: 0.6198 - Recall: 0.8086 - acc: 0.7702 - loss: 0.4899 - val_Precision: 0.5716 - val_Recall: 0.8310 - val_acc: 0.7447 - val_loss: 0.5153
Epoch 29/100
[1m68/82[0m [32m━━━━━━━━━━━━━━━━[0m[37m━━━━[0m [1m0s[0m 3ms/step - Precision: 0.6141 - Recall: 0.7929 - acc: 0.7727 - loss: 0.4791



[1m82/82[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step - Precision: 0.6136 - Recall: 0.7940 - acc: 0.7707 - loss: 0.4825 - val_Precision: 0.6056 - val_Recall: 0.7910 - val_acc: 0.7665 - val_loss: 0.4917
Epoch 30/100
[1m77/82[0m [32m━━━━━━━━━━━━━━━━━━[0m[37m━━[0m [1m0s[0m 3ms/step - Precision: 0.6101 - Recall: 0.7920 - acc: 0.7565 - loss: 0.5080



[1m82/82[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 6ms/step - Precision: 0.6101 - Recall: 0.7927 - acc: 0.7568 - loss: 0.5074 - val_Precision: 0.6133 - val_Recall: 0.7690 - val_acc: 0.7692 - val_loss: 0.4876
Epoch 31/100
[1m82/82[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step - Precision: 0.6111 - Recall: 0.7766 - acc: 0.7568 - loss: 0.5096 - val_Precision: 0.6042 - val_Recall: 0.7876 - val_acc: 0.7652 - val_loss: 0.4987
Epoch 32/100
[1m82/82[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step - Precision: 0.6192 - Recall: 0.8031 - acc: 0.7641 - loss: 0.4987 - val_Precision: 0.6105 - val_Recall: 0.7697 - val_acc: 0.7674 - val_loss: 0.4890
Epoch 33/100
[1m82/82[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step - Precision: 0.6136 - Recall: 0.7811 - acc: 0.7671 - loss: 0.5006 - val_Precision: 0.5873 - val_Recall: 0.8028 - val_acc: 0.7545 - val_loss: 0.5019
Epoch 34/100
[1m82/82[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 



[1m82/82[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 10ms/step - Precision: 0.6208 - Recall: 0.8042 - acc: 0.7690 - loss: 0.4876 - val_Precision: 0.6077 - val_Recall: 0.7745 - val_acc: 0.7661 - val_loss: 0.4815
Epoch 40/100
[1m82/82[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step - Precision: 0.6142 - Recall: 0.8128 - acc: 0.7639 - loss: 0.4925 - val_Precision: 0.5884 - val_Recall: 0.8055 - val_acc: 0.7556 - val_loss: 0.5094
Epoch 41/100
[1m82/82[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step - Precision: 0.6139 - Recall: 0.8123 - acc: 0.7681 - loss: 0.4889 - val_Precision: 0.6044 - val_Recall: 0.7903 - val_acc: 0.7656 - val_loss: 0.4931
Epoch 42/100
[1m82/82[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step - Precision: 0.6128 - Recall: 0.7946 - acc: 0.7630 - loss: 0.4994 - val_Precision: 0.5901 - val_Recall: 0.8014 - val_acc: 0.7565 - val_loss: 0.5067
Epoch 43/100
[1m82/82[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m



[1m82/82[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 6ms/step - Precision: 0.6186 - Recall: 0.8019 - acc: 0.7640 - loss: 0.4983 - val_Precision: 0.6250 - val_Recall: 0.7586 - val_acc: 0.7754 - val_loss: 0.4800
Epoch 61/100
[1m80/82[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 3ms/step - Precision: 0.6111 - Recall: 0.7850 - acc: 0.7623 - loss: 0.4948



[1m82/82[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step - Precision: 0.6112 - Recall: 0.7855 - acc: 0.7624 - loss: 0.4948 - val_Precision: 0.6262 - val_Recall: 0.7614 - val_acc: 0.7765 - val_loss: 0.4799
Epoch 62/100
[1m82/82[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step - Precision: 0.6197 - Recall: 0.7978 - acc: 0.7706 - loss: 0.4855 - val_Precision: 0.5741 - val_Recall: 0.8393 - val_acc: 0.7474 - val_loss: 0.5177
Epoch 63/100
[1m82/82[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step - Precision: 0.6102 - Recall: 0.8266 - acc: 0.7659 - loss: 0.4876 - val_Precision: 0.6111 - val_Recall: 0.7890 - val_acc: 0.7701 - val_loss: 0.4893
Epoch 64/100
[1m82/82[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step - Precision: 0.6164 - Recall: 0.8102 - acc: 0.7654 - loss: 0.4907 - val_Precision: 0.6048 - val_Recall: 0.7883 - val_acc: 0.7656 - val_loss: 0.4917
Epoch 65/100
[1m82/82[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 

Registered model 'DNN-model' already exists. Creating a new version of this model...
Created version '8' of model 'DNN-model'.


[1m141/141[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 946us/step
              precision    recall  f1-score   support

           0     0.8778    0.7732    0.8222      3047
           1     0.6189    0.7738    0.6877      1450

    accuracy                         0.7734      4497
   macro avg     0.7483    0.7735    0.7550      4497
weighted avg     0.7943    0.7734    0.7788      4497

[1m141/141[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 985us/step - Precision: 0.6184 - Recall: 0.7472 - acc: 0.7679 - loss: 0.4915


### 結果分析：
- Recall 提升了不少

    - Recall（靈敏度）高達 ~82%，這對於偵測火災（正類）是好事，表示較少漏判火災發生。

- Precision 稍低，但在接受範圍內

    - Precision 約 57%，代表有一些假陽性，但 Recall 更重要時（例如防災應用），這是可以接受的 trade-off。

- class_weight 有明顯效果




<hr>

## LSTM

In [55]:
import numpy as np
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Dropout
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import EarlyStopping
from sklearn.metrics import classification_report, accuracy_score, roc_auc_score
from sklearn.model_selection import train_test_split
import matplotlib.pyplot as plt

In [None]:
#準備序列數據 (Prepare sequences)
df_seq = df.sort_values(by=['YEAR', 'MONTH']).reset_index(drop=True)
X_all = df_seq.drop(columns=['YEAR', 'MONTH', 'FIRE_START_DAY'])
X_all = X_all.fillna(X.median())
assert X_all.isna().sum().sum() == 0, "There are still missing values in the dataset."
y_all = df_seq['FIRE_START_DAY']
def create_sequences(X, y, time_steps=12): #時間步長為12
    Xs, ys = [], []
    for i in range(len(X) - time_steps): #(11980, 12, 17) 
        Xs.append(X[i:(i + time_steps)])#(11980, 12, 17)
        ys.append(y[i + time_steps])#(11980,)
    #X[i:(i + time_steps)] 會取出從 i 到 i+time_steps 的資料，這樣就能夠形成一個時間序列的樣本。
    return np.array(Xs), np.array(ys)

#對於每個樣本，Xs 會包含過去12個時刻的特徵，而 ys 會包含12個時間步長後的目標變數。
time_steps  = 12
X_seq, y_seq = create_sequences(X_all, y_all, time_steps=time_steps)

#訓練/測試集切分 (Train/test split)
X_train, X_test, y_train, y_test = train_test_split(
    X_seq, y_seq,
    test_size=0.2,
    random_state=42,
    stratify=y_seq
)

In [85]:
X_all.describe() #檢查資料集的統計資訊

Unnamed: 0,PRECIPITATION,MAX_TEMP,MIN_TEMP,AVG_WIND_SPEED,TEMP_RANGE,WIND_TEMP_RATIO,LAGGED_PRECIPITATION,LAGGED_AVG_WIND_SPEED,TEMP_VARIATION,PRECIPITATION_WIND_RATIO,SEASONAL_PRECIP_WIND,SEASONAL_DRYNESS,DIURNAL_TEMP_WIND
count,14988.0,14988.0,14988.0,14988.0,14988.0,14988.0,14988.0,14988.0,14988.0,14988.0,14988.0,14988.0,14988.0
mean,0.032313,70.534961,56.494129,7.434878,14.040766,0.107016,0.226188,7.434198,14.040766,0.003538,2.022262,0.003404,100.008452
std,0.179538,7.263206,6.767461,2.129146,5.99515,0.035616,0.648705,1.387849,5.99515,0.018879,3.49589,0.027388,40.484232
min,0.0,50.0,33.0,1.79,2.0,0.023553,0.0,3.227143,2.0,0.0,0.0,0.0,7.16
25%,0.0,65.0,51.0,6.04,10.0,0.085395,0.0,6.518571,10.0,0.0,0.0,0.0,72.45
50%,0.0,70.0,57.0,7.16,12.0,0.102222,0.0,7.478571,12.0,0.0,0.0,0.0,93.12
75%,0.0,75.0,62.0,8.5,17.0,0.120462,0.06,8.278571,17.0,0.0,5.37,0.0,120.75
max,4.53,106.0,77.0,26.17,41.0,0.459123,8.18,13.932857,41.0,0.405188,14.76,1.465,405.9


In [None]:
import numpy as np
import mlflow
import mlflow.tensorflow
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Dropout
from tensorflow.keras.regularizers import l2
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint
from sklearn.utils.class_weight import compute_class_weight

# ⚠️ 確保 y 是 numpy array 且為 float32
y_train = y_train.astype(np.float32)
y_test = y_test.astype(np.float32)
X_test = X_test.astype(np.float32)
X_train = X_train.astype(np.float32)

# ⚖️ 計算 class_weight
classes = np.unique(y_train)
class_weights = compute_class_weight(class_weight='balanced', classes=classes, y=y_train)
class_weight_dict = dict(zip(classes, class_weights))

# 🎯 模型訓練
with mlflow.start_run(run_name='LSTM'):
    mlflow.tensorflow.autolog()

    model = Sequential([
        LSTM(64, input_shape=(X_train.shape[1], X_train.shape[2]), return_sequences=True,
             kernel_regularizer=l2(0.001)),
        Dropout(0.3),
        LSTM(32, kernel_regularizer=l2(0.001)),
        Dropout(0.3),
        Dense(1, activation='sigmoid', kernel_regularizer=l2(0.001))
    ])
    model.summary()

    model.compile(loss='bce', optimizer=Adam(learning_rate=0.0005), metrics=['acc', 'Recall', 'Precision'])

    early_stop = EarlyStopping(monitor='val_loss', patience=10, verbose=1, restore_best_weights=True)
    checkpoint = ModelCheckpoint('./models_temp/LSTM_best_model.h5', monitor='val_loss', save_best_only=True, verbose=1)

    history = model.fit(
        X_train, y_train,
        validation_data=(X_test, y_test),
        epochs=100,
        batch_size=128,
        callbacks=[early_stop, checkpoint],
        class_weight=class_weight_dict,
        verbose=1
    )

    # 評估
    train_loss, train_acc, train_recall, train_precision = model.evaluate(X_train, y_train, verbose=0)
    test_loss, test_acc, test_recall, test_precision = model.evaluate(X_test, y_test, verbose=0)
    mlflow.log_metric("Train score", train_acc)#mlflow
    mlflow.log_metric("Test score", test_acc)#mlflow
    #註冊模型
    run_id = mlflow.active_run().info.run_id#mlflow
    result = mlflow.register_model(
        model_uri=f"runs:/{run_id}/model",  # 你要用 mlflow.log_model 存的位置
        name="LSTM-model"              # 註冊後的 model name
    )


  super().__init__(**kwargs)


Epoch 1/100
[1m91/94[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 8ms/step - Precision: 0.4661 - Recall: 0.5336 - acc: 0.6514 - loss: 0.7525
Epoch 1: val_loss improved from inf to 0.66075, saving model to ./models_temp/LSTM_best_model.h5




[1m94/94[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 15ms/step - Precision: 0.4693 - Recall: 0.5382 - acc: 0.6535 - loss: 0.7502 - val_Precision: 0.5811 - val_Recall: 0.7173 - val_acc: 0.7346 - val_loss: 0.6608
Epoch 2/100
[1m90/94[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 9ms/step - Precision: 0.6089 - Recall: 0.7130 - acc: 0.7545 - loss: 0.6122
Epoch 2: val_loss improved from 0.66075 to 0.56166, saving model to ./models_temp/LSTM_best_model.h5




[1m94/94[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 11ms/step - Precision: 0.6088 - Recall: 0.7128 - acc: 0.7543 - loss: 0.6121 - val_Precision: 0.6464 - val_Recall: 0.6217 - val_acc: 0.7617 - val_loss: 0.5617
Epoch 3/100
[1m93/94[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 7ms/step - Precision: 0.6083 - Recall: 0.7254 - acc: 0.7461 - loss: 0.5946
Epoch 3: val_loss did not improve from 0.56166
[1m94/94[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 9ms/step - Precision: 0.6082 - Recall: 0.7255 - acc: 0.7462 - loss: 0.5943 - val_Precision: 0.5896 - val_Recall: 0.7515 - val_acc: 0.7440 - val_loss: 0.5758
Epoch 4/100
[1m88/94[0m [32m━━━━━━━━━━━━━━━━━━[0m[37m━━[0m [1m0s[0m 8ms/step - Precision: 0.5851 - Recall: 0.7378 - acc: 0.7394 - loss: 0.5686
Epoch 4: val_loss did not improve from 0.56166
[1m94/94[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 9ms/step - Precision: 0.5856 - Recall: 0.7379 - acc: 0.7397 - loss: 0.5683 - val_Precision: 0.547



[1m94/94[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 10ms/step - Precision: 0.5844 - Recall: 0.7659 - acc: 0.7411 - loss: 0.5564 - val_Precision: 0.6031 - val_Recall: 0.7324 - val_acc: 0.7513 - val_loss: 0.5443
Epoch 6/100
[1m91/94[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 8ms/step - Precision: 0.5845 - Recall: 0.7535 - acc: 0.7464 - loss: 0.5528
Epoch 6: val_loss did not improve from 0.54433
[1m94/94[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 9ms/step - Precision: 0.5849 - Recall: 0.7535 - acc: 0.7464 - loss: 0.5528 - val_Precision: 0.5587 - val_Recall: 0.8139 - val_acc: 0.7250 - val_loss: 0.5803
Epoch 7/100
[1m92/94[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 7ms/step - Precision: 0.5865 - Recall: 0.7763 - acc: 0.7404 - loss: 0.5474
Epoch 7: val_loss improved from 0.54433 to 0.51860, saving model to ./models_temp/LSTM_best_model.h5




[1m94/94[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 10ms/step - Precision: 0.5867 - Recall: 0.7760 - acc: 0.7407 - loss: 0.5473 - val_Precision: 0.6391 - val_Recall: 0.6610 - val_acc: 0.7637 - val_loss: 0.5186
Epoch 8/100
[1m87/94[0m [32m━━━━━━━━━━━━━━━━━━[0m[37m━━[0m [1m0s[0m 7ms/step - Precision: 0.5940 - Recall: 0.7617 - acc: 0.7445 - loss: 0.5398
Epoch 8: val_loss did not improve from 0.51860
[1m94/94[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 9ms/step - Precision: 0.5938 - Recall: 0.7614 - acc: 0.7447 - loss: 0.5401 - val_Precision: 0.5403 - val_Recall: 0.8219 - val_acc: 0.7089 - val_loss: 0.5866
Epoch 9/100
[1m91/94[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 8ms/step - Precision: 0.5879 - Recall: 0.7574 - acc: 0.7402 - loss: 0.5462
Epoch 9: val_loss did not improve from 0.51860
[1m94/94[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 10ms/step - Precision: 0.5880 - Recall: 0.7574 - acc: 0.7404 - loss: 0.5460 - val_Precision: 0.60

Registered model 'LSTM-model' already exists. Creating a new version of this model...
Created version '8' of model 'LSTM-model'.
Registered model 'LSTM-model' already exists. Creating a new version of this model...
Created version '9' of model 'LSTM-model'.


<hr>

## Attention mechanism

In [89]:
X = df.drop(['FIRE_START_DAY'], axis=1)
y = df['FIRE_START_DAY']
X = X.fillna(X.median())
assert X.isna().sum().sum() == 0, "There are still missing values in the dataset."
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=42, test_size=0.3)

In [90]:
from tensorflow.keras.layers import Input, Dense, Dropout, BatchNormalization, MultiHeadAttention, Flatten, GlobalAveragePooling1D, Reshape
from tensorflow.keras.models import Model

#
with mlflow.start_run(run_name='AttentionMechanisim'):#mlflow
    mlflow.tensorflow.autolog()#mlflow    
    n_input = X_train.shape[1]
    # Model
    # 定義模型
    input_layer = Input(shape=(n_input,))
    x = BatchNormalization()(input_layer)
    x = Dense(128, activation='relu')(x)
    x = Dropout(0.1)(x)
    # Reshape 輸入，以便進行注意力機制（將它變成三維張量）
    x = Reshape((1, 128))(x)  # 假設每個樣本有 128 個特徵，這樣就會有 1 個時間步
    # 添加多頭注意力層
    x_attention = MultiHeadAttention(num_heads=4, key_dim=32)(x, x)  # query, key 和 value 都是 x
    x_attention = Dropout(0.1)(x_attention)
    # 將注意力層的輸出展平
    x_flattened = Flatten()(x_attention)
    # 經過展平後的處理
    x = Dense(128, activation='relu')(x_flattened)
    x = Dropout(0.1)(x)
    output = Dense(1, activation='sigmoid')(x)


    # Model
    model = Model(inputs=input_layer, outputs=output)
    model.summary()
    # 模型optimizer 和 learning rate
    initial_lr = 0.001
    from tensorflow.keras.optimizers import schedules
    lr_schedule = schedules.ExponentialDecay(
        initial_learning_rate=initial_lr,
        decay_steps=100000,
        decay_rate=0.96,
        staircase=True)
    from tensorflow.keras.optimizers import Adam
    optimizer = Adam(learning_rate=lr_schedule)

    model.compile(loss='bce', optimizer=optimizer, metrics=['acc', 'Recall', 'Precision'])
    # EarlyStopping: 根據 val_loss 停止訓練
    early_stop = EarlyStopping(monitor='val_loss', patience=10, verbose=1, restore_best_weights=True)
    # ModelCheckpoint: 儲存最佳模型
    checkpoint = ModelCheckpoint('./models_temp/attention_best_model.h5', monitor='val_loss', save_best_only=True, verbose=1)
    history = model.fit(X_train, y_train, validation_data=(X_test, y_test), epochs=100, batch_size=128, verbose=1, callbacks=[early_stop, checkpoint], class_weight=class_weight_dict)

    train_loss, train_acc, train_recall, train_precision = model.evaluate(X_train, y_train, verbose=0)
    test_loss, test_acc, test_recall, test_precision = model.evaluate(X_test, y_test, verbose=0)
    mlflow.log_metric("Train score", train_acc)#mlflow
    mlflow.log_metric("Test score", test_acc)#mlflow
    #註冊模型
    run_id = mlflow.active_run().info.run_id#mlflow
    result = mlflow.register_model(
        model_uri=f"runs:/{run_id}/model",  # 你要用 mlflow.log_model 存的位置
        name="attention-model"              # 註冊後的 model name
    )

    # 評估模型
    model.evaluate(X_test, y_test)



Epoch 1/100
[1m81/82[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 6ms/step - Precision: 0.5878 - Recall: 0.7180 - acc: 0.7401 - loss: 0.5509
Epoch 1: val_loss improved from inf to 20.11911, saving model to ./models_temp/attention_best_model.h5




[1m82/82[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 12ms/step - Precision: 0.5879 - Recall: 0.7194 - acc: 0.7402 - loss: 0.5502 - val_Precision: 0.3224 - val_Recall: 1.0000 - val_acc: 0.3224 - val_loss: 20.1191
Epoch 2/100
[1m79/82[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 6ms/step - Precision: 0.5903 - Recall: 0.8170 - acc: 0.7449 - loss: 0.5033
Epoch 2: val_loss improved from 20.11911 to 1.25602, saving model to ./models_temp/attention_best_model.h5




[1m82/82[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 9ms/step - Precision: 0.5904 - Recall: 0.8160 - acc: 0.7450 - loss: 0.5034 - val_Precision: 0.3310 - val_Recall: 0.9993 - val_acc: 0.3485 - val_loss: 1.2560
Epoch 3/100
[1m78/82[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 6ms/step - Precision: 0.5865 - Recall: 0.8013 - acc: 0.7450 - loss: 0.5009
Epoch 3: val_loss improved from 1.25602 to 0.60706, saving model to ./models_temp/attention_best_model.h5




[1m82/82[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 9ms/step - Precision: 0.5865 - Recall: 0.8014 - acc: 0.7449 - loss: 0.5011 - val_Precision: 0.4992 - val_Recall: 0.8414 - val_acc: 0.6767 - val_loss: 0.6071
Epoch 4/100
[1m82/82[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 6ms/step - Precision: 0.6047 - Recall: 0.7908 - acc: 0.7528 - loss: 0.5101
Epoch 4: val_loss did not improve from 0.60706
[1m82/82[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 8ms/step - Precision: 0.6046 - Recall: 0.7909 - acc: 0.7527 - loss: 0.5100 - val_Precision: 0.4656 - val_Recall: 0.8862 - val_acc: 0.6353 - val_loss: 0.6598
Epoch 5/100
[1m82/82[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 6ms/step - Precision: 0.5914 - Recall: 0.7955 - acc: 0.7494 - loss: 0.5006
Epoch 5: val_loss did not improve from 0.60706
[1m82/82[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 8ms/step - Precision: 0.5915 - Recall: 0.7956 - acc: 0.7494 - loss: 0.5006 - val_Precision: 0.5020



[1m82/82[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 9ms/step - Precision: 0.5892 - Recall: 0.7988 - acc: 0.7484 - loss: 0.4955 - val_Precision: 0.5290 - val_Recall: 0.8483 - val_acc: 0.7076 - val_loss: 0.5635
Epoch 7/100
[1m75/82[0m [32m━━━━━━━━━━━━━━━━━━[0m[37m━━[0m [1m0s[0m 7ms/step - Precision: 0.5857 - Recall: 0.7915 - acc: 0.7482 - loss: 0.4984
Epoch 7: val_loss improved from 0.56355 to 0.51582, saving model to ./models_temp/attention_best_model.h5




[1m82/82[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 10ms/step - Precision: 0.5864 - Recall: 0.7924 - acc: 0.7483 - loss: 0.4984 - val_Precision: 0.5852 - val_Recall: 0.7745 - val_acc: 0.7503 - val_loss: 0.5158
Epoch 8/100
[1m76/82[0m [32m━━━━━━━━━━━━━━━━━━[0m[37m━━[0m [1m0s[0m 6ms/step - Precision: 0.6073 - Recall: 0.7819 - acc: 0.7617 - loss: 0.4912
Epoch 8: val_loss improved from 0.51582 to 0.49418, saving model to ./models_temp/attention_best_model.h5




[1m82/82[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 9ms/step - Precision: 0.6065 - Recall: 0.7830 - acc: 0.7609 - loss: 0.4913 - val_Precision: 0.5905 - val_Recall: 0.7717 - val_acc: 0.7538 - val_loss: 0.4942
Epoch 9/100
[1m80/82[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 7ms/step - Precision: 0.6007 - Recall: 0.7897 - acc: 0.7516 - loss: 0.4896
Epoch 9: val_loss did not improve from 0.49418
[1m82/82[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 9ms/step - Precision: 0.6007 - Recall: 0.7898 - acc: 0.7517 - loss: 0.4898 - val_Precision: 0.5671 - val_Recall: 0.8103 - val_acc: 0.7394 - val_loss: 0.5253
Epoch 10/100
[1m81/82[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 7ms/step - Precision: 0.6054 - Recall: 0.8079 - acc: 0.7611 - loss: 0.4860
Epoch 10: val_loss did not improve from 0.49418
[1m82/82[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 9ms/step - Precision: 0.6052 - Recall: 0.8079 - acc: 0.7610 - loss: 0.4862 - val_Precision: 0.56



[1m82/82[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 9ms/step - Precision: 0.5985 - Recall: 0.8124 - acc: 0.7509 - loss: 0.4893 - val_Precision: 0.5912 - val_Recall: 0.7710 - val_acc: 0.7543 - val_loss: 0.4934
Epoch 14/100
[1m81/82[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 7ms/step - Precision: 0.5989 - Recall: 0.7974 - acc: 0.7530 - loss: 0.4937
Epoch 14: val_loss did not improve from 0.49343
[1m82/82[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 8ms/step - Precision: 0.5989 - Recall: 0.7975 - acc: 0.7531 - loss: 0.4935 - val_Precision: 0.5890 - val_Recall: 0.8014 - val_acc: 0.7556 - val_loss: 0.5179
Epoch 15/100
[1m79/82[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 8ms/step - Precision: 0.5914 - Recall: 0.8021 - acc: 0.7514 - loss: 0.4946
Epoch 15: val_loss did not improve from 0.49343
[1m82/82[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 9ms/step - Precision: 0.5920 - Recall: 0.8020 - acc: 0.7516 - loss: 0.4945 - val_Precision: 0.



[1m82/82[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 8ms/step - Precision: 0.6230 - Recall: 0.8146 - acc: 0.7650 - loss: 0.4808 - val_Precision: 0.5952 - val_Recall: 0.7634 - val_acc: 0.7563 - val_loss: 0.4921
Epoch 19/100
[1m79/82[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 6ms/step - Precision: 0.6096 - Recall: 0.7879 - acc: 0.7618 - loss: 0.4763
Epoch 19: val_loss did not improve from 0.49209
[1m82/82[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 8ms/step - Precision: 0.6094 - Recall: 0.7883 - acc: 0.7615 - loss: 0.4767 - val_Precision: 0.5707 - val_Recall: 0.8103 - val_acc: 0.7423 - val_loss: 0.5130
Epoch 20/100
[1m78/82[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 6ms/step - Precision: 0.5966 - Recall: 0.8057 - acc: 0.7516 - loss: 0.4855
Epoch 20: val_loss did not improve from 0.49209
[1m82/82[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 8ms/step - Precision: 0.5970 - Recall: 0.8055 - acc: 0.7520 - loss: 0.4853 - val_Precision: 0.



[1m82/82[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 9ms/step - Precision: 0.6055 - Recall: 0.8105 - acc: 0.7575 - loss: 0.4934 - val_Precision: 0.6049 - val_Recall: 0.7717 - val_acc: 0.7638 - val_loss: 0.4788
Epoch 23/100
[1m74/82[0m [32m━━━━━━━━━━━━━━━━━━[0m[37m━━[0m [1m0s[0m 6ms/step - Precision: 0.6101 - Recall: 0.8044 - acc: 0.7616 - loss: 0.4810
Epoch 23: val_loss did not improve from 0.47882
[1m82/82[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 8ms/step - Precision: 0.6100 - Recall: 0.8048 - acc: 0.7617 - loss: 0.4810 - val_Precision: 0.5659 - val_Recall: 0.8324 - val_acc: 0.7400 - val_loss: 0.5129
Epoch 24/100
[1m81/82[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 6ms/step - Precision: 0.6060 - Recall: 0.8276 - acc: 0.7641 - loss: 0.4699
Epoch 24: val_loss did not improve from 0.47882
[1m82/82[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 8ms/step - Precision: 0.6059 - Recall: 0.8273 - acc: 0.7639 - loss: 0.4702 - val_Precision: 0.



[1m  1/141[0m [37m━━━━━━━━━━━━━━━━━━━━[0m [1m2s[0m 18ms/step - Precision: 0.3077 - Recall: 0.5714 - acc: 0.6250 - loss: 0.7323

Registered model 'attention-model' already exists. Creating a new version of this model...
Created version '6' of model 'attention-model'.


[1m141/141[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - Precision: 0.6040 - Recall: 0.7525 - acc: 0.7591 - loss: 0.4873
