In [1]:
!pip install autokeras

Collecting autokeras
  Downloading autokeras-1.1.0-py3-none-any.whl (148 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m148.6/148.6 kB[0m [31m1.6 MB/s[0m eta [36m0:00:00[0m
Collecting keras-tuner>=1.1.0 (from autokeras)
  Downloading keras_tuner-1.4.6-py3-none-any.whl (128 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m128.9/128.9 kB[0m [31m9.4 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting keras-nlp>=0.4.0 (from autokeras)
  Downloading keras_nlp-0.6.4-py3-none-any.whl (584 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m584.8/584.8 kB[0m [31m17.7 MB/s[0m eta [36m0:00:00[0m
Collecting keras-core (from keras-nlp>=0.4.0->autokeras)
  Downloading keras_core-0.1.7-py3-none-any.whl (950 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m950.8/950.8 kB[0m [31m24.3 MB/s[0m eta [36m0:00:00[0m
Collecting tensorflow-text (from keras-nlp>=0.4.0->autokeras)
  Downloading tensorflow_text-2.15.0-cp310-cp

In [2]:
import pandas as pd
import tensorflow as tf

import autokeras as ak

Using TensorFlow backend


To make this tutorial easy to follow, we use the UCI Airquality dataset, and try to
forecast the AH value at the different timesteps. Some basic preprocessing has also
been performed on the dataset as it required cleanup.

## A Simple Example
The first step is to prepare your data. Here we use the [UCI Airquality
dataset](https://archive.ics.uci.edu/ml/datasets/Air+Quality) as an example.


In [None]:
dataset = pd.read_csv('AirQualityUCI.csv', sep=";")
dataset = dataset[dataset.columns[:-2]]
dataset = dataset.dropna()
dataset = dataset.replace(",", ".", regex=True)

val_split = int(len(dataset) * 0.7)
data_train = dataset[:val_split]
validation_data = dataset[val_split:]

data_x = data_train[
    [
        "CO(GT)",
        "PT08.S1(CO)",
        "NMHC(GT)",
        "C6H6(GT)",
        "PT08.S2(NMHC)",
        "NOx(GT)",
        "PT08.S3(NOx)",
        "NO2(GT)",
        "PT08.S4(NO2)",
        "PT08.S5(O3)",
        "T",
        "RH",
    ]
].astype("float64")

data_x_val = validation_data[
    [
        "CO(GT)",
        "PT08.S1(CO)",
        "NMHC(GT)",
        "C6H6(GT)",
        "PT08.S2(NMHC)",
        "NOx(GT)",
        "PT08.S3(NOx)",
        "NO2(GT)",
        "PT08.S4(NO2)",
        "PT08.S5(O3)",
        "T",
        "RH",
    ]
].astype("float64")

# Data with train data and the unseen data from subsequent time steps.
data_x_test = dataset[
    [
        "CO(GT)",
        "PT08.S1(CO)",
        "NMHC(GT)",
        "C6H6(GT)",
        "PT08.S2(NMHC)",
        "NOx(GT)",
        "PT08.S3(NOx)",
        "NO2(GT)",
        "PT08.S4(NO2)",
        "PT08.S5(O3)",
        "T",
        "RH",
    ]
].astype("float64")

data_y = data_train["AH"].astype("float64")

data_y_val = validation_data["AH"].astype("float64")

print(data_x.shape)  # (6549, 12)
print(data_y.shape)  # (6549,)

(6549, 12)
(6549,)


The second step is to run the [TimeSeriesForecaster](/time_series_forecaster).
As a quick demo, we set epochs to 10.
You can also leave the epochs unspecified for an adaptive number of epochs.


In [None]:
predict_from = 1
predict_until = 1 #number of predicted items in future
lookback = 10
clf = ak.TimeseriesForecaster(
    lookback=lookback,
    predict_from=predict_from,
    predict_until=predict_until,
    max_trials=10,
    objective="val_loss",
)
# Train the TimeSeriesForecaster with train data
clf.fit(
    x=data_x,
    y=data_y,
    validation_data=(data_x_val, data_y_val),
    batch_size=32,
    epochs=30,
)
# Predict with the best model(includes original training data).
predictions = clf.predict(data_x_test)
print(predictions.shape)
# Evaluate the best model with testing data.
print(clf.evaluate(data_x_val, data_y_val))

Trial 6 Complete [00h 03m 55s]
val_loss: 2.174639940261841

Best val_loss So Far: 2.174639940261841
Total elapsed time: 00h 45m 16s

Search: Running Trial #7

Value             |Best Value So Far |Hyperparameter
True              |True              |timeseries_block_1/rnn_block_1/bidirectional
lstm              |gru               |timeseries_block_1/rnn_block_1/layer_type
2                 |2                 |timeseries_block_1/rnn_block_1/num_layers
0                 |0                 |regression_head_1/dropout
adam              |adam              |optimizer
0.01              |0.01              |learning_rate

Epoch 1/30


In [None]:
predictions