In [1]:
project_id = 'pytorch-tpu-nfs'
dataset_id = 'view_dataset'
table_id = 'weather_time_series'
experiment_id = 'weather-exp'
staging_bucket = 'gs://automl-samples'
location='us-central1'
context_window = 24

In [14]:
import os
import datetime
import numpy as np
import pandas as pd
import seaborn as sns
import tensorflow as tf
from google.cloud import bigquery
from google.cloud.exceptions import NotFound

import tempfile
import argparse
import sys
import os

from view_demo.utils import get_project_id
csv_path = 'gs://bench-datasets/jena_climate_2009_2016.csv'
df = pd.read_csv(csv_path)

# Convert to hourly dataset
# slice [start:stop:step], starting from index 5 take every 6th record.
df = df[5::6]

# Clean Data
wv = df['wv (m/s)']
bad_wv = wv == -9999.0
wv[bad_wv] = 0.0

max_wv = df['max. wv (m/s)']
bad_max_wv = max_wv == -9999.0
max_wv[bad_max_wv] = 0.0

# The above inplace edits are reflected in the DataFrame
df['wv (m/s)'].min()


# Rename Columns to comply with BQ
df.rename(columns={
    'p (mbar)': 'p__mbar',
    'T (degC)': 'T__degC',
    'Tpot (K)': 'Tpot__K',
    'Tdew (degC)': 'Tdew__degC',
    'rh (%)': 'rh__percent',
    'VPmax (mbar)': 'VPmax__mbar' ,
    'VPact (mbar)': 'VPact__mbar',
    'VPdef (mbar)': 'VPdef__mbar',
    'sh (g/kg)': 'sh__g_per_kg',
    'H2OC (mmol/mol)': 'H2OC__mmol_per_mol',
    'rho (g/m**3)': 'rho__gm_per_cubic_m',
    'max Wx': 'max_Wx',
    'max Wy': 'max_Wy',
    'Day sin': 'Day_sin',
    'Day cos': 'Day_cos',
    'Year sin': 'Year_sin',
    'Year cos': 'Year_cos',
    'Date Time': 'Date_Time',
    'wv (m/s)' : 'wv__m_per_s',
    'max. wv (m/s)': 'max_w__vm_per_s',
    'wd (deg)': 'wd__deg'

}, inplace=True)

# Write to BQ
client = bigquery.Client(location="us-central1", project=project_id)
print("Client creating using default project: {}".format(client.project))

try:
    dataset = client.get_dataset(dataset_id)  # Make an API request.
    print("Dataset {} already exists".format(dataset_id))
except NotFound:
    print("Dataset {} is not found, Creating..".format(dataset_id))
    dataset = client.create_dataset(dataset_id)

table_ref = dataset.table(table_id)

job_config = bigquery.LoadJobConfig(
    destination_table_description=table_ref,
    autodetect=True,
)
# Overwrite the table if already exists
job_config.write_disposition = 'WRITE_TRUNCATE'

#job = client.load_table_from_dataframe(df, table_ref, location="us-central1")
#job.result()  # Waits for table load to complete.
#print("Loaded dataframe to {}".format(table_ref.path))

#return table_ref.path

Client creating using default project: pytorch-tpu-nfs
Dataset view_dataset already exists


In [15]:
df.shape
job = client.load_table_from_dataframe(df, table_ref, location="us-central1")
job.result()  # Waits for table load to complete.
print("Loaded dataframe to {}".format(table_ref.path))

table_ref.path

Loaded dataframe to /projects/pytorch-tpu-nfs/datasets/view_dataset/tables/weather_time_series


'/projects/pytorch-tpu-nfs/datasets/view_dataset/tables/weather_time_series'

In [2]:
run_id = f'context-window-{context_window}'
from view_demo.train.custom_tf_trainer import trainer
experiment_tracking_on = staging_bucket is not None
trainer = trainer(
   project_id,
   location,
   dataset_id,
   table_id
)
trainer.read_dataset()
train_df, val_df, test_df = trainer.create_split()
wide_window = WindowGenerator(
    input_width=context_window, label_width=context_window, shift=1,
    label_columns=['T__degC'],
    train_df=train_df,
    test_df=test_df,
    val_df=val_df)
history = trainer.compile_and_fit(linear, wide_window)
if experiment_tracking_on:
    from google.cloud import aiplatform
    aiplatform.init(
        project=project_id,
        staging_bucket=staging_bucket,
        experiment=experiment_id
    )
    aiplatform.start_run(run=run_id)
    aiplatform.log_metrics({"val_loss": history.history['val_loss'][-1]})
    aiplatform.log_metrics({"val_mae": history.history['val_mean_absolute_error'][-1]})
    aiplatform.log_metrics({"train_loss": history.history['loss'][-1]})
    aiplatform.log_metrics({"train_mae": history.history['mean_absolute_error'][-1]})

NameError: name 'WindowGenerator' is not defined

In [58]:
print(history.history)
aiplatform.log_metrics({"train_loss": history.history['loss'][-1]})
aiplatform.log_metrics({"train_mae": history.history['mean_absolute_error'][-1]})

{'loss': [12.012874603271484, 12.001683235168457], 'mean_absolute_error': [1.9278665781021118, 1.924259901046753], 'val_loss': [5.564371585845947, 5.717701435089111], 'val_mean_absolute_error': [1.4334169626235962, 1.4232121706008911]}


In [26]:
aiplatform.init(project=project_id, location=location, experiment=experiment_id)
metrics_df = aiplatform.get_experiment_df()
metrics_df

Unnamed: 0,experiment_name,run_name,metric.val_mae,metric.val_loss
0,weather-exp,window-24,1.422686,5.575446


In [59]:
exp_df = aiplatform.get_experiment_df(experiment='weather-exp')

In [74]:
exp_df.loc[exp_df['run_name'] == run_id]['metric.val_mae'].values[-1]
#.at[0,'metric.val_mae']
    
    #['metric.val_mae'] > 1.5).any():
    #print("HIT")

1.4232121706008911

In [25]:
#from google.cloud import aiplatform
#aiplatform.init(project=project_id, staging_bucket=staging_bucket, experiment=experiment_id)
aiplatform.log_metrics({"val_loss": history.history['val_loss'][-1]})
aiplatform.log_metrics({"val_mae": history.history['val_mean_absolute_error'][-1]})



In [4]:
!python3 -m pip install google-cloud-aiplatform --upgrade

Collecting google-cloud-aiplatform
  Downloading google_cloud_aiplatform-1.0.1-py2.py3-none-any.whl (1.8 MB)
[K     |████████████████████████████████| 1.8 MB 8.4 MB/s eta 0:00:01
Collecting google-cloud-storage<2.0.0dev,>=1.32.0
  Downloading google_cloud_storage-1.38.0-py2.py3-none-any.whl (103 kB)
[K     |████████████████████████████████| 103 kB 41.8 MB/s eta 0:00:01
Installing collected packages: google-cloud-storage, google-cloud-aiplatform
  Attempting uninstall: google-cloud-storage
    Found existing installation: google-cloud-storage 1.30.0
    Uninstalling google-cloud-storage-1.30.0:
      Successfully uninstalled google-cloud-storage-1.30.0
  Attempting uninstall: google-cloud-aiplatform
    Found existing installation: google-cloud-aiplatform 0.6.0
    Uninstalling google-cloud-aiplatform-0.6.0:
      Successfully uninstalled google-cloud-aiplatform-0.6.0
[31mERROR: After October 2020 you may experience errors when installing or updating packages. This is because pip wil

In [4]:
project_id = 'pytorch-tpu-nfs'
staging_bucket = 'gs://automl-samples'
location='us-central1'
context_window = 24
run_id = f'context-window-{context_window}'
experiment_prefix = 'weather-exp'


In [5]:
from google.cloud import aiplatform
from datetime import datetime

# Create and experiment tag
TIMESTAMP = datetime.now().strftime("%Y%m%d%H%M%S")
experiment_id = experiment_prefix + TIMESTAMP

# Init AI Platform
aiplatform.init(
    project=project_id,
    staging_bucket=staging_bucket,
    experiment=experiment_id
)

# Define the custom training job
job = aiplatform.CustomContainerTrainingJob(
    display_name="view-training",
    container_uri='gcr.io/pytorch-tpu-nfs/test-custom-trainer:latest',
    model_serving_container_image_uri="gcr.io/cloud-aiplatform/prediction/tf2-cpu.2-2:latest",
)


model = job.run(
    replica_count=1, 
    model_display_name="temp-prediction",
    args=[
        f'--experiment-id={experiment_id}', 
        f'--staging-bucket={staging_bucket}',
        f'--context-window={context_window}'
    ]
)
metrics_df = aiplatform.get_experiment_df(experiment_id)
#metrics_df.loc[exp_df['run_name'] == run_id]['metric.val_mae'].values[-1]

print(metrics_df)
out_model = model

INFO:root:Resource weather-exp20210603231244 not found.
INFO:root:Creating Resource weather-exp20210603231244


TypeError: run() got an unexpected keyword argument 'tensorboard'

In [30]:
!ls ../../

build  google-cloud-aiplatform-0.6.0.tar.gz  setup.py	view_demo.egg-info
docs   README.md			     view_demo


In [16]:
import os
os.environ.get('AIP_MODEL_DIR', "123")

'123'

In [18]:
out_model

<google.cloud.aiplatform.models.Model object at 0x7fb381e09950> 
resource name: projects/64701051322/locations/us-central1/models/1968055444974862336

In [19]:
dir(out_model)

['_FutureManager__latest_future',
 '_FutureManager__latest_future_lock',
 '__abstractmethods__',
 '__class__',
 '__delattr__',
 '__dict__',
 '__dir__',
 '__doc__',
 '__eq__',
 '__format__',
 '__ge__',
 '__getattribute__',
 '__gt__',
 '__hash__',
 '__init__',
 '__init_subclass__',
 '__le__',
 '__lt__',
 '__module__',
 '__ne__',
 '__new__',
 '__reduce__',
 '__reduce_ex__',
 '__repr__',
 '__setattr__',
 '__sizeof__',
 '__str__',
 '__subclasshook__',
 '__weakref__',
 '_abc_impl',
 '_are_futures_done',
 '_complete_future',
 '_construct_sdk_resource_from_gapic',
 '_delete_method',
 '_deploy',
 '_empty_constructor',
 '_exception',
 '_gca_resource',
 '_get_and_validate_project_location',
 '_get_gca_resource',
 '_getter_method',
 '_instantiate_client',
 '_is_client_prediction_client',
 '_latest_future',
 '_list',
 '_list_method',
 '_list_with_local_order',
 '_raise_future_exception',
 '_resource_noun',
 '_submit',
 '_sync_gca_resource',
 '_sync_object_with_future_result',
 '_wait_on_export',
 '

In [26]:
out_model.uri

'gs://automl-samples/aiplatform-custom-training-2021-06-02-00:39:59.034/model'

In [21]:
from google_cloud_pipeline_components import aiplatform as gcc_aip

AttributeError: module 'google.cloud.aiplatform.base' has no attribute 'AiPlatformResourceNoun'

In [22]:
from google_cloud_pipeline_components import aiplatform as gcc_aip

AttributeError: module 'google.cloud.aiplatform.base' has no attribute 'AiPlatformResourceNoun'