# Login

In [None]:
import cengine

client = cengine.Client(username='USERNAME', 
                        password='PASSWORD')

# Providers

In [1]:
BUCKET_NAME ='gs://BUCKET_NAME'
SERVICE_ACCOUNT = '/SERVICE_ACCOUNT.json'
PROVIDER_NAME = 'PROVIDER_NAME'

In [None]:
my_provider = client.create_provider(name=PROVIDER_NAME, 
                                     provider_type='gcp', 
                                     args={'service_account': SERVICE_ACCOUNT, 
                                           'artifact_store': BUCKET_NAME})

# Workspaces

In [None]:
# Create a workspace
active_workspace = client.create_workspace(name="HelloWorkspace", 
                                           provider_id=my_provider.id)

print(active_workspace)

# Datasources

In [None]:
new_datasource = client.create_datasource(name='QuickstartDataset',
                                          provider_id=my_provider.id,
                                          source='bq',
                                          type='tabular', 
                                          args={"dataset": "ml_datasets", 
                                                "table": "census_adult_income", 
                                                "project": "bigquery-public-data"})
                                                
print(new_datasource)

In [None]:
new_datasource_commit = client.commit_datasource(new_datasource.id)

print(new_datasource_commit)

In [None]:
sample = client.peek_datasource_commit(new_datasource.id, 
                                       new_datasource_commit.id)
                                       
print(sample)

# Pipelines

In [None]:
from cengine import PipelineConfig

# Start with a template
c = PipelineConfig.from_datasource(client=client,
                                   datasource_id=new_datasource.id,
                                   commit_id=new_datasource_commit.id)

# Configure you dataset split
c.split.categorize.by = 'marital_status'
c.split.ratio = {'train': 0.8, 'eval': 0.2}

# Configure non-default preprocessing with a built-in method
c.features['education_num'].transform.add_methods(
    {'method':'compute_and_apply_vocabulary'})
    
# Configure your labels
del c.features.income_bracket
c.labels.add(['income_bracket'])

# Configure your evaluation
del c.features.native_country
c.evaluator.slices = [['native_country']]
c.evaluator.metrics = ['binary_accuracy']

# Configure your training with a built-in model
c.trainer.fn = 'feedforward'
c.trainer.params = {'epochs': 25,
                    'input_units': 13,
                    'output_units': 1,
                    'loss': 'binary_crossentropy',
                    'metrics': ['binary_accuracy'],
                    'batch_size': 16,
                    'lr': 0.0005}

#### Final config

In [None]:
print(c)

#### Register and train a pipeline

In [None]:
first_pipeline = client.push_pipeline(name='QuickStartPipeline',
                                      config=c,
                                      workspace_id=active_workspace.id)

In [None]:
first_pipeline_run = client.train_pipeline(pipeline_id=first_pipeline.id,
                                           datasource_commit_id=new_datasource_commit.id)

#### Check pipeline status

In [None]:
client.get_pipeline_status(workspace_id=active_workspace.id)

In [None]:
client.get_pipeline_run_logs(pipeline_id=first_pipeline.id, 
                             pipeline_run_id=first_pipeline_run.id)

#### Check the statistics

In [None]:
client.get_statistics(pipeline_id=first_pipeline.id,
                      pipeline_run_id=first_pipeline_run.id,
                      magic=True)

#### See the results

In [None]:
client.evaluate_single_pipeline(pipeline_id=first_pipeline.id,
                                pipeline_run_id=first_pipeline_run.id,
                                magic=True)

#### Download the model

In [None]:
import os 

client.download_model(pipeline_id=first_pipeline.id, 
                      pipeline_run_id=first_pipeline_run.id,
                      output_path=os.path.join(os.getcwd(), 'model'))

In [None]:
!find model

#### Iterate

In [None]:
second_config = client.pull_pipeline(pipeline_id=first_pipeline.id)

second_config.trainer.params['batch_size'] = 32

In [None]:
second_pipeline = client.push_pipeline(name='SecondPipeline',
                                       config=second_config,
                                       workspace_id=active_workspace.id)

In [None]:
second_pipeline_run = client.train_pipeline(
    pipeline_id=second_pipeline.id,
    datasource_commit_id=new_datasource_commit.id)