In [19]:
# Imports the packages that are needed
import json
from ydata.connectors import GCSConnector
from ydata.connectors.filetype import FileType
from ydata.utils.formats import read_json

In [20]:
# Initialize the YData's connector 
token = read_json('gcs_credentials.json')
connector = GCSConnector('ydatasynthetic', keyfile_dict=token)


+---------+----------------+----------------+----------------+
| Package | client         | scheduler      | workers        |
+---------+----------------+----------------+----------------+
| numpy   | 1.20.3         | 1.19.5         | 1.19.5         |
| python  | 3.7.11.final.0 | 3.7.10.final.0 | 3.7.10.final.0 |
+---------+----------------+----------------+----------------+


In [21]:
# Read the data from the Cloud Storage 
data = connector.read_file('gs://ydata_testdata/tabular/mobile_network/data.csv', file_type = FileType.CSV)

In [22]:
# Output the data. In the elyra pipeline, you need to output the data in order to let it be available for the next steps.
final_df = data.to_pandas()
final_df.to_csv('data.csv', index=False)

In [24]:
# Here we create the visualization of the table. This is the metadata that kubeflows need to show some lines of the dataset.
import json

schema = [{"name": c, "type": "NUMBER" } for c in final_df.columns]

metadata = {
    'outputs' : [{
      'type': 'table',
      'storage': 'inline',
      'format': 'csv',
      'header': [x['name'] for x in schema],
      'source': final_df.to_csv(header=False, index=False)
    }]
  }

with open("mlpipeline-ui-metadata.json", 'w') as metadata_file:
    json.dump(metadata, metadata_file)