In [None]:
# Copyright 2021 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

# Feedback or issues?

For any feedback or questions, please open an [issue](https://github.com/googleapis/python-aiplatform/issues).

# Vertex SDK for Python: AutoML Text Classification Example
To use this Jupyter notebook, copy the notebook to a Google Cloud Notebooks instance with Tensorflow installed and open it. You can run each step, or cell, and see its results. To run a cell, use Shift+Enter. Jupyter automatically displays the return value of the last line in each cell. For more information about running notebooks in Google Cloud Notebook, see the [Google Cloud Notebook guide](https://cloud.google.com/vertex-ai/docs/general/notebooks).


This notebook demonstrate how to create an AutoML Text Classification Model, with a Vertex AI text dataset, and how to serve the model for online prediction.

Note: you may incur charges for training, prediction, storage or usage of other GCP products in connection with testing this SDK

### Install Vertex SDK for Python


After the SDK installation the kernel will be automatically restarted.

In [None]:
!pip3 uninstall -y google-cloud-aiplatform
# pip3 install google-cloud-aiplatform
import IPython

app = IPython.Application.instance()
app.kernel.do_shutdown(True)

In [None]:
import sys

if "google.colab" in sys.modules:
    from google.colab import auth

    auth.authenticate_user()

### Enter Your Project and GCS Bucket

Enter your Project Id in the cell below. Then run the cell to make sure the Cloud SDK uses the right project for all the commands in this notebook.

In [None]:
MY_PROJECT = "YOUR PROJECT ID"
MY_STAGING_BUCKET = "gs://YOUR BUCKET"  # bucket should be in same region as ucaip

## Initialize Vertex SDK for Python

Initialize the *client* for Vertex AI.

In [None]:
from google.cloud import aiplatform

aiplatform.init(project=MY_PROJECT, staging_bucket=MY_STAGING_BUCKET)

## Create a Dataset on Vertex AI

We will now create a Vertex AI text dataset using the previously prepared csv files. Choose one of the options below. 

### The HappyDB dataset
To create a single-label classification model, we will use the "happy moments" dataset derived from the Kaggle open-source dataset HappyDB. The resulting model classifies happy moments into categories reflecting the causes of happiness.

The goal of the corpus is to advance the understanding of the causes of happiness through text-based reflection.

Please reference [AutoML Documentation](https://cloud.google.com/natural-language/automl/docs/quickstart#model_objectives) for more information.

In [None]:
# Text Classification
IMPORT_FILE = "gs://cloud-ml-data/NL-classification/happiness.csv"

In [None]:
ds = aiplatform.TextDataset.create(
    display_name="happydb",
    gcs_source=[IMPORT_FILE],
    import_schema_uri=aiplatform.schema.dataset.ioformat.text.multi_label_classification,
)

ds.resource_name

## Launch a Training Job and Create a Model on Vertex AI

In [None]:
job = aiplatform.AutoMLTextTrainingJob(
    display_name="train-housing-automl_1",
    prediction_type="classification",
    multi_label=True,
)

model = job.run(
    dataset=ds,
    training_fraction_split=0.6,
    validation_fraction_split=0.2,
    test_fraction_split=0.2,
    model_display_name="happy-model",
)

# Deploy Model

In [None]:
endpoint = model.deploy()

# Predict on Endpoint

In [None]:
instances_list = [{"content": "I met my long lost friend"}]

prediction = endpoint.predict(instances_list)
prediction

In [None]:
prediction_instance = prediction.predictions[0]

confidences = prediction_instance["confidences"]
max_index = confidences.index(max(confidences))
print(prediction_instance["displayNames"][max_index])