In [1]:
import tempfile

tempdir_context = tempfile.TemporaryDirectory()

def cleanup():
    tempdir_context.__exit__(None,None,None)
    
class Demo:
    """parameters for this demo"""
    
    # the profile name under which waylay credentials are stored
    waylay_client_profile='staging'
    
    # a prefix for all resources created by this demo
    resource_prefix = 'demo_etl_import'
    
    temp_dir = tempdir_context.__enter__()
    

In [2]:
import waylay
waylay.__version__

'v0.1.3+14.gdff4198.dirty'

In [3]:
waylay_client = waylay.WaylayClient.from_profile(Demo.waylay_client_profile)
etl_tool = waylay_client.timeseries.etl_tool

# set the default temporary directory for the tool, so that we can cleanup the demo with `cleanup()`
etl_tool.temp_dir = Demo.temp_dir

# conversion of (large) files and streams

The python SDK supports importing large sets of time series using files and streams in a CSV format.

The SDK converts the file to a local ETL Import file and uploads this file to waylay for an asynchronous import.

This section illustrates the various csv import formats that are supported by the SDK (without actually uploading the series)

In [4]:
import pandas as pd
import csv
import io
import gzip

def preview_csv_text(csv_text: str, lines=10):
    with io.StringIO(csv_text) as csv_stream:
        display(pd.read_csv(csv_stream).head(lines))
                
def preview_import_file(path: str, lines=10):
    with gzip.open(path, 'rt') as csv_stream:
        display(pd.read_csv(csv_stream).head(lines))
        

### etl_tool.prepare_import(input, _args_)
Takes a data input and converts it to a local waylay _etl import_ file that can be
uploaded to the waylay system. 

The first argument to `etl_tool.prepare_import` denotes the input data, and can be a
  - (csv) file location (with .gz extension if gzipped)
  - csv text stream, 
  - iterable of string tuples (e.g. a `[('timestamp','value'), (t1,v1), (t2,v2)]`
  - pandas dataframe
In these example we use a csv text stream constructed from the `csv_text` string.

In [5]:
csv_text=(
    "timestamp,temperature\n"
    "2021-03-01T00:00+00:00,-1\n"
    "2021-03-01T03:00+00:00,-2\n"
    "2021-03-01T06:00+00:00,3\n"
    "2021-03-01T09:00+00:00,10\n"
    "2021-03-01T12:00+00:00,15\n"
    "2021-03-01T15:00+00:00,21\n"
    "2021-03-01T18:00+00:00,14\n"
    "2021-03-01T21:00+00:00,8\n"
)
preview_csv_text(csv_text)

Unnamed: 0,timestamp,temperature
0,2021-03-01T00:00+00:00,-1
1,2021-03-01T03:00+00:00,-2
2,2021-03-01T06:00+00:00,3
3,2021-03-01T09:00+00:00,10
4,2021-03-01T12:00+00:00,15
5,2021-03-01T15:00+00:00,21
6,2021-03-01T18:00+00:00,14
7,2021-03-01T21:00+00:00,8


Invoking 
```
   etl_import = prepare_import(input_data, **args)
``` 
provides a `WaylayETLSeriesImport` object that holds references to the local ETL file, and can be passed on to further workflow steps, such as:

* `etl_tool.initiate_import(etl_import)` uploads the local ETL file to the waylay system and kicks off the ingestion process.
* `etl_tool.check_import(etl_import)` checks the current status of the ingestion process.
* `etl_tool.read_import_as_dataframe(etl_import)` creates a Pandas dataframe of the local ETL file, letting you validate its content.

Unless specified otherwise, the tool expects that:
* you specify a `resource` name for the data set as an argument
* the csv data has a `timestamp` column containing ISO8601 timestamps _with_ timezone information.
* each other column contains a series, with the column header as _metric_ name 

In [6]:
etl_import = etl_tool.prepare_import(
    io.StringIO(csv_text),
    resource=f'{Demo.resource_prefix}_01',
)

preview_import_file(etl_import.import_file.path)        

# read the etl file back in as a dataframe
etl_tool.read_import_as_dataframe(etl_import)


100%|██████████| 1.00/1.00 [00:00<00:00, 1.89kseries/s]


Unnamed: 0,resource,metric,timestamp,value
0,demo_etl_import_01,temperature,2021-03-01T00:00:00Z,-1
1,demo_etl_import_01,temperature,2021-03-01T03:00:00Z,-2
2,demo_etl_import_01,temperature,2021-03-01T06:00:00Z,3
3,demo_etl_import_01,temperature,2021-03-01T09:00:00Z,10
4,demo_etl_import_01,temperature,2021-03-01T12:00:00Z,15
5,demo_etl_import_01,temperature,2021-03-01T15:00:00Z,21
6,demo_etl_import_01,temperature,2021-03-01T18:00:00Z,14
7,demo_etl_import_01,temperature,2021-03-01T21:00:00Z,8


resource,demo_etl_import_01
metric,temperature
timestamp,Unnamed: 1_level_2
2021-03-01 00:00:00+00:00,-1.0
2021-03-01 03:00:00+00:00,-2.0
2021-03-01 06:00:00+00:00,3.0
2021-03-01 09:00:00+00:00,10.0
2021-03-01 12:00:00+00:00,15.0
2021-03-01 15:00:00+00:00,21.0
2021-03-01 18:00:00+00:00,14.0
2021-03-01 21:00:00+00:00,8.0


### multiple metric columns

In [7]:
csv_text=(
    "timestamp,temperature,humidity\n"
    "2021-03-01T00:00Z,-1,203\n"
    "2021-03-01T03:00Z,-2,201\n"
    "2021-03-01T06:00Z,3,221\n"
    "2021-03-01T09:00Z,10,223\n"
    "2021-03-01T12:00Z,15,243\n"
    "2021-03-01T15:00Z,21,183\n"
    "2021-03-01T18:00Z,14,203\n"
    "2021-03-01T21:00Z,8,200\n"
)
preview_csv_text(csv_text)
etl_import = etl_tool.prepare_import(
    io.StringIO(csv_text),
    resource=f'{Demo.resource_prefix}_01',
)
preview_import_file(etl_import.import_file.path, 16)  

Unnamed: 0,timestamp,temperature,humidity
0,2021-03-01T00:00Z,-1,203
1,2021-03-01T03:00Z,-2,201
2,2021-03-01T06:00Z,3,221
3,2021-03-01T09:00Z,10,223
4,2021-03-01T12:00Z,15,243
5,2021-03-01T15:00Z,21,183
6,2021-03-01T18:00Z,14,203
7,2021-03-01T21:00Z,8,200


100%|██████████| 2.00/2.00 [00:00<00:00, 5.17kseries/s]


Unnamed: 0,resource,metric,timestamp,value
0,demo_etl_import_01,temperature,2021-03-01T00:00:00Z,-1
1,demo_etl_import_01,temperature,2021-03-01T03:00:00Z,-2
2,demo_etl_import_01,temperature,2021-03-01T06:00:00Z,3
3,demo_etl_import_01,temperature,2021-03-01T09:00:00Z,10
4,demo_etl_import_01,temperature,2021-03-01T12:00:00Z,15
5,demo_etl_import_01,temperature,2021-03-01T15:00:00Z,21
6,demo_etl_import_01,temperature,2021-03-01T18:00:00Z,14
7,demo_etl_import_01,temperature,2021-03-01T21:00:00Z,8
8,demo_etl_import_01,humidity,2021-03-01T00:00:00Z,203
9,demo_etl_import_01,humidity,2021-03-01T03:00:00Z,201


### metric specified in column
The main other layout supported by `etl_tool` is a csv file with a single `value` column, and a `metric` column that indicates to which series each value belongs.
* a `value_column` must be specified
* a `metric_column` can be specified, defaults to the column name `metric`


In [8]:
csv_text=(
    "timestamp,metric,value\n"
    "2021-03-01T00:00Z,temperature,-1\n"
    "2021-03-01T00:00Z,humidity,203\n"
    "2021-03-01T03:00Z,temperature,-2\n"
    "2021-03-01T03:00Z,humidity,201\n"
    "2021-03-01T06:00Z,temperature,3\n"
    "2021-03-01T06:00Z,humidity,221\n"
    "2021-03-01T09:00Z,temperature,10\n"
    "2021-03-01T09:00Z,humidity,223\n"
    "2021-03-01T12:00Z,temperature,15\n"
    "2021-03-01T12:00Z,humidity,243\n"
    "2021-03-01T15:00Z,temperature,21\n"
    "2021-03-01T15:00Z,humidity,183\n"
    "2021-03-01T18:00Z,temperature,14\n"
    "2021-03-01T18:00Z,humidity,203\n"
    "2021-03-01T21:00Z,temperature,8\n"
    "2021-03-01T21:00Z,humidity,200\n"
)
preview_csv_text(csv_text)
etl_import = etl_tool.prepare_import(
    io.StringIO(csv_text),
    resource=f'{Demo.resource_prefix}_01',
    value_column='value'
)
preview_import_file(etl_import.import_file.path, 100)  

Unnamed: 0,timestamp,metric,value
0,2021-03-01T00:00Z,temperature,-1
1,2021-03-01T00:00Z,humidity,203
2,2021-03-01T03:00Z,temperature,-2
3,2021-03-01T03:00Z,humidity,201
4,2021-03-01T06:00Z,temperature,3
5,2021-03-01T06:00Z,humidity,221
6,2021-03-01T09:00Z,temperature,10
7,2021-03-01T09:00Z,humidity,223
8,2021-03-01T12:00Z,temperature,15
9,2021-03-01T12:00Z,humidity,243


100%|██████████| 2.00/2.00 [00:00<00:00, 5.22kseries/s]


Unnamed: 0,resource,metric,timestamp,value
0,demo_etl_import_01,temperature,2021-03-01T00:00:00Z,-1
1,demo_etl_import_01,temperature,2021-03-01T03:00:00Z,-2
2,demo_etl_import_01,temperature,2021-03-01T06:00:00Z,3
3,demo_etl_import_01,temperature,2021-03-01T09:00:00Z,10
4,demo_etl_import_01,temperature,2021-03-01T12:00:00Z,15
5,demo_etl_import_01,temperature,2021-03-01T15:00:00Z,21
6,demo_etl_import_01,temperature,2021-03-01T18:00:00Z,14
7,demo_etl_import_01,temperature,2021-03-01T21:00:00Z,8
8,demo_etl_import_01,humidity,2021-03-01T00:00:00Z,203
9,demo_etl_import_01,humidity,2021-03-01T03:00:00Z,201


### resource in column
Rather than a fixed value for the whole dataset, a _resource_ column can indicate the Waylay resource that 
each data entry belongs to. 

If `resource` is not specified, the tool expects a column named `resource`, or expects you to specify a `resource_column`.

In [9]:
csv_text=(
    "time,dataset,measure,observed\n"
    "2021-03-01T00:00Z,r1,temperature,-1\n"
    "2021-03-01T00:00Z,r1,humidity,203\n"
    "2021-03-01T03:00Z,r2,temperature,-2\n"
    "2021-03-01T03:00Z,r2,humidity,201\n"
    "2021-03-01T06:00Z,r1,temperature,3\n"
    "2021-03-01T06:00Z,r1,humidity,221\n"
    "2021-03-01T09:00Z,r2,temperature,10\n"
    "2021-03-01T09:00Z,r2,humidity,223\n"
    "2021-03-01T12:00Z,r1,temperature,15\n"
    "2021-03-01T12:00Z,r1,humidity,243\n"
    "2021-03-01T15:00Z,r2,temperature,21\n"
    "2021-03-01T15:00Z,r2,humidity,183\n"
    "2021-03-01T18:00Z,r1,temperature,14\n"
    "2021-03-01T18:00Z,r1,humidity,203\n"
    "2021-03-01T21:00Z,r2,temperature,8\n"
    "2021-03-01T21:00Z,r2,humidity,200\n"
)
preview_csv_text(csv_text)
etl_import = etl_tool.prepare_import(
    io.StringIO(csv_text),
    timestamp_column='time',
    resource_column='dataset',
    metric_column='measure',
    value_column='observed'
)
preview_import_file(etl_import.import_file.path, 100)  

Unnamed: 0,time,dataset,measure,observed
0,2021-03-01T00:00Z,r1,temperature,-1
1,2021-03-01T00:00Z,r1,humidity,203
2,2021-03-01T03:00Z,r2,temperature,-2
3,2021-03-01T03:00Z,r2,humidity,201
4,2021-03-01T06:00Z,r1,temperature,3
5,2021-03-01T06:00Z,r1,humidity,221
6,2021-03-01T09:00Z,r2,temperature,10
7,2021-03-01T09:00Z,r2,humidity,223
8,2021-03-01T12:00Z,r1,temperature,15
9,2021-03-01T12:00Z,r1,humidity,243


100%|██████████| 4.00/4.00 [00:00<00:00, 12.0kseries/s]


Unnamed: 0,resource,metric,timestamp,value
0,r1,temperature,2021-03-01T00:00:00Z,-1
1,r1,temperature,2021-03-01T06:00:00Z,3
2,r1,temperature,2021-03-01T12:00:00Z,15
3,r1,temperature,2021-03-01T18:00:00Z,14
4,r1,humidity,2021-03-01T00:00:00Z,203
5,r1,humidity,2021-03-01T06:00:00Z,221
6,r1,humidity,2021-03-01T12:00:00Z,243
7,r1,humidity,2021-03-01T18:00:00Z,203
8,r2,temperature,2021-03-01T03:00:00Z,-2
9,r2,temperature,2021-03-01T09:00:00Z,10


### ETL import/export csv files
As a special rule, any csv file with headers 
> `timestamp`,`resource`,`metric`,`value`

(in any order) will be recognised as to have series values in the `value` column.
They do not require parameter configuration.
This is the format of the ETL import/export csv files.

In [10]:
csv_text=(
    "timestamp,resource,metric,value\n"
    "2021-03-01T00:00Z,r1,temperature,-1\n"
    "2021-03-01T00:00Z,r1,humidity,203\n"
    "2021-03-01T03:00Z,r2,temperature,-2\n"
    "2021-03-01T03:00Z,r2,humidity,201\n"
    "2021-03-01T06:00Z,r1,temperature,3\n"
    "2021-03-01T06:00Z,r1,humidity,221\n"
    "2021-03-01T09:00Z,r2,temperature,10\n"
    "2021-03-01T09:00Z,r2,humidity,223\n"
    "2021-03-01T12:00Z,r1,temperature,15\n"
    "2021-03-01T12:00Z,r1,humidity,243\n"
    "2021-03-01T15:00Z,r2,temperature,21\n"
    "2021-03-01T15:00Z,r2,humidity,183\n"
    "2021-03-01T18:00Z,r1,temperature,14\n"
    "2021-03-01T18:00Z,r1,humidity,203\n"
    "2021-03-01T21:00Z,r2,temperature,8\n"
    "2021-03-01T21:00Z,r2,humidity,200\n"
)
preview_csv_text(csv_text)
etl_import = etl_tool.prepare_import(io.StringIO(csv_text))
preview_import_file(etl_import.import_file.path, 100)  

Unnamed: 0,timestamp,resource,metric,value
0,2021-03-01T00:00Z,r1,temperature,-1
1,2021-03-01T00:00Z,r1,humidity,203
2,2021-03-01T03:00Z,r2,temperature,-2
3,2021-03-01T03:00Z,r2,humidity,201
4,2021-03-01T06:00Z,r1,temperature,3
5,2021-03-01T06:00Z,r1,humidity,221
6,2021-03-01T09:00Z,r2,temperature,10
7,2021-03-01T09:00Z,r2,humidity,223
8,2021-03-01T12:00Z,r1,temperature,15
9,2021-03-01T12:00Z,r1,humidity,243


100%|██████████| 4.00/4.00 [00:00<00:00, 9.31kseries/s]


Unnamed: 0,resource,metric,timestamp,value
0,r1,temperature,2021-03-01T00:00:00Z,-1
1,r1,temperature,2021-03-01T06:00:00Z,3
2,r1,temperature,2021-03-01T12:00:00Z,15
3,r1,temperature,2021-03-01T18:00:00Z,14
4,r1,humidity,2021-03-01T00:00:00Z,203
5,r1,humidity,2021-03-01T06:00:00Z,221
6,r1,humidity,2021-03-01T12:00:00Z,243
7,r1,humidity,2021-03-01T18:00:00Z,203
8,r2,temperature,2021-03-01T03:00:00Z,-2
9,r2,temperature,2021-03-01T09:00:00Z,10


### resource in column, multiple metric columns
When series are in columns, each observation can still be for another resource
by indicating a `resource_column`.

In [11]:
csv_text=(
    "resource,timestamp,temperature,humidity\n"
    "r1,2021-03-01T00:00Z,-1,203\n"
    "r2,2021-03-01T00:00Z,-2,201\n"
    "r1,2021-03-01T06:00Z,3,221\n"
    "r2,2021-03-01T06:00Z,10,223\n"
    "r1,2021-03-01T12:00Z,15,243\n"
    "r2,2021-03-01T12:00Z,21,183\n"
    "r1,2021-03-01T18:00Z,14,203\n"
    "r2,2021-03-01T18:00Z,8,200"
)
preview_csv_text(csv_text)
etl_import = etl_tool.prepare_import(
    io.StringIO(csv_text),
    resource_column='resource',
)
preview_import_file(etl_import.import_file.path, 16)  

Unnamed: 0,resource,timestamp,temperature,humidity
0,r1,2021-03-01T00:00Z,-1,203
1,r2,2021-03-01T00:00Z,-2,201
2,r1,2021-03-01T06:00Z,3,221
3,r2,2021-03-01T06:00Z,10,223
4,r1,2021-03-01T12:00Z,15,243
5,r2,2021-03-01T12:00Z,21,183
6,r1,2021-03-01T18:00Z,14,203
7,r2,2021-03-01T18:00Z,8,200


100%|██████████| 4.00/4.00 [00:00<00:00, 7.39kseries/s]


Unnamed: 0,resource,metric,timestamp,value
0,r1,temperature,2021-03-01T00:00:00Z,-1
1,r1,temperature,2021-03-01T06:00:00Z,3
2,r1,temperature,2021-03-01T12:00:00Z,15
3,r1,temperature,2021-03-01T18:00:00Z,14
4,r1,humidity,2021-03-01T00:00:00Z,203
5,r1,humidity,2021-03-01T06:00:00Z,221
6,r1,humidity,2021-03-01T12:00:00Z,243
7,r1,humidity,2021-03-01T18:00:00Z,203
8,r2,temperature,2021-03-01T00:00:00Z,-2
9,r2,temperature,2021-03-01T06:00:00Z,10


### Filtering resources and metrics

The `resources` and `metrics` parameters allow you to filter the series that are imported.

In [12]:
etl_import = etl_tool.prepare_import(
    io.StringIO(csv_text),
    resource_column='resource',
    resources=['r1'],
    metrics=['temperature']
)
preview_import_file(etl_import.import_file.path, 16)  

100%|██████████| 1.00/1.00 [00:00<00:00, 3.71kseries/s]


Unnamed: 0,resource,metric,timestamp,value
0,r1,temperature,2021-03-01T00:00:00Z,-1
1,r1,temperature,2021-03-01T06:00:00Z,3
2,r1,temperature,2021-03-01T12:00:00Z,15
3,r1,temperature,2021-03-01T18:00:00Z,14


### Mapping _resource_ and _metric_ identifiers
You can map the _resource_ and _metric_ keys used in the data set, to another _metric name_ or _resource id_ for its usage as a Waylay series.

This requires to specify `Resource` objects in the `resources` argument, and `Metric` objects in the `metrics` argument.

The `Metric` class allow you to specify other metadata, such as the `value_type` type hint.

In [13]:
from waylay.service.timeseries.parser import Metric, Resource
etl_import = etl_tool.prepare_import(
    io.StringIO(csv_text),
    resource_column='resource',
    resources=[
        # map `r1` in the input data to the `resource_1` resource id
        Resource('resource_1',key='r1'), 
        # map `r2` in the input data to the `resource_2` resource id
        Resource('resource_2',key='r2')
    ],
    metrics=[
        # map `temperature` in the input data to `temp` metric name
        Metric('temp',key='temperature',unit='°C',value_type='integer')
    ]
)
preview_import_file(etl_import.import_file.path, 16) 

100%|██████████| 2.00/2.00 [00:00<00:00, 6.38kseries/s]


Unnamed: 0,resource,metric,timestamp,value
0,resource_1,temp,2021-03-01T00:00:00Z,-1
1,resource_1,temp,2021-03-01T06:00:00Z,3
2,resource_1,temp,2021-03-01T12:00:00Z,15
3,resource_1,temp,2021-03-01T18:00:00Z,14
4,resource_2,temp,2021-03-01T00:00:00Z,-2
5,resource_2,temp,2021-03-01T06:00:00Z,10
6,resource_2,temp,2021-03-01T12:00:00Z,21
7,resource_2,temp,2021-03-01T18:00:00Z,8


resource,resource_1,resource_2
metric,temp,temp
timestamp,Unnamed: 1_level_2,Unnamed: 2_level_2
2021-03-01 00:00:00+00:00,-1,-2
2021-03-01 06:00:00+00:00,3,10
2021-03-01 12:00:00+00:00,15,21
2021-03-01 18:00:00+00:00,14,8


In [25]:
display(etl_tool.read_import_as_dataframe(etl_import))

for r in etl_tool.list_import_resources(etl_import):
    display(r.to_dict())

resource,resource_1,resource_2
metric,temp,temp
timestamp,Unnamed: 1_level_2,Unnamed: 2_level_2
2021-03-01 00:00:00+00:00,-1,-2
2021-03-01 06:00:00+00:00,3,10
2021-03-01 12:00:00+00:00,15,21
2021-03-01 18:00:00+00:00,14,8


{'id': 'resource_1',
 'key': 'r1',
 'metrics': [{'name': 'temp', 'valueType': 'integer', 'unit': '°C'}]}

{'id': 'resource_2',
 'key': 'r2',
 'metrics': [{'name': 'temp', 'valueType': 'integer', 'unit': '°C'}]}

In [26]:
import sys
class HugeDataSet():
    """An _iterable_ dataset that provides data for a number of resources and metrics."""
    def __init__(self, size:int, resources=4, metrics=12, values=1000):
        self.size=size
        self.resources=resources
        self.metrics=metrics
        self.values=values

    def __iter__(self):
        resources=self.resources
        metrics=self.metrics
        values=self.values
        yield ['timestamp', 'resource', 'metric', 'value']
        for i in range(self.size):
            yield [ pd.Timestamp(i, unit='s'), f'res_{i%resources}', f'metric_{i%metrics}', i % values]



In [27]:
etl_import = etl_tool.prepare_import(
    HugeDataSet(200),
    value_column='value',
    resources=['res_3']
)
preview_import_file(etl_import.import_file.path, 16)  

100%|██████████| 3.00/3.00 [00:00<00:00, 903series/s]


Unnamed: 0,resource,metric,timestamp,value
0,res_3,metric_3,1970-01-01T00:00:03,3
1,res_3,metric_3,1970-01-01T00:00:15,15
2,res_3,metric_3,1970-01-01T00:00:27,27
3,res_3,metric_3,1970-01-01T00:00:39,39
4,res_3,metric_3,1970-01-01T00:00:51,51
5,res_3,metric_3,1970-01-01T00:01:03,63
6,res_3,metric_3,1970-01-01T00:01:15,75
7,res_3,metric_3,1970-01-01T00:01:27,87
8,res_3,metric_3,1970-01-01T00:01:39,99
9,res_3,metric_3,1970-01-01T00:01:51,111


In [28]:
etl_import = etl_tool.prepare_import(
    HugeDataSet(int(10e5))
)
display(str(etl_import.import_file.path))
display(etl_import.import_file.path.stat().st_size)

100%|██████████| 12.0/12.0 [00:27<00:00, 2.33s/series]


'/var/folders/07/zn347xhn33z8m79l8xtz1hn80000gp/T/tmpe8lgg9ws/import-20210224.171349-timeseries.csv.gz'

3641431

In [29]:
for r in etl_tool.list_import_resources(etl_import):
    display(r.to_dict())

{'id': 'res_0',
 'metrics': [{'name': 'metric_0'}, {'name': 'metric_4'}, {'name': 'metric_8'}]}

{'id': 'res_1',
 'metrics': [{'name': 'metric_1'}, {'name': 'metric_5'}, {'name': 'metric_9'}]}

{'id': 'res_2',
 'metrics': [{'name': 'metric_2'},
  {'name': 'metric_6'},
  {'name': 'metric_10'}]}

{'id': 'res_3',
 'metrics': [{'name': 'metric_3'},
  {'name': 'metric_7'},
  {'name': 'metric_11'}]}

In [30]:
etl_import = etl_tool.prepare_import(
    HugeDataSet(int(10e6), resources=100, metrics=2),
    resources=['res_0']
)
display(str(etl_import.import_file.path))
display(etl_import.import_file.path.stat().st_size)

100%|██████████| 1.00/1.00 [00:20<00:00, 20.3s/series]


'/var/folders/07/zn347xhn33z8m79l8xtz1hn80000gp/T/tmpe8lgg9ws/import-20210224.171551-timeseries.csv.gz'

270048

In [31]:
export_name = 'staging.waylay.io-20210222-timeseries.csv.gz'
local_export_file = f'{Demo.temp_dir}/{export_name}'
waylay_client.storage.object.stat('etl-export',export_name)
waylay_client.storage.content.get('etl-export',export_name, to_file=local_export_file)

100%|██████████| 79.8M/79.8M [00:07<00:00, 10.9MB/s]


PosixPath('/var/folders/07/zn347xhn33z8m79l8xtz1hn80000gp/T/tmpe8lgg9ws/staging.waylay.io-20210222-timeseries.csv.gz')

In [32]:
etl_import = etl_tool.prepare_import(
    local_export_file,
    metrics=['activeScore'],
    resources=['thomas@waylay.io@fitbitsimulator.00574']
)

100%|██████████| 1.00/1.00 [00:09<00:00, 9.96s/series]


In [33]:
for r in etl_tool.list_import_resources(etl_import):
    display(r.to_dict())
    
display(etl_tool.read_import_as_dataframe(etl_import))

{'id': 'thomas@waylay.io@fitbitsimulator.00574',
 'metrics': [{'name': 'activeScore'}]}

resource,thomas@waylay.io@fitbitsimulator.00574
metric,activeScore
timestamp,Unnamed: 1_level_2
2021-02-21 14:27:02.391000+00:00,-1.0
2021-02-21 14:48:34.300000+00:00,2.0
2021-02-21 14:49:10.541000+00:00,2.0
2021-02-21 15:44:41.791000+00:00,1.0
2021-02-21 16:02:42.641000+00:00,-1.0
2021-02-21 16:19:57.726000+00:00,-1.0
2021-02-21 16:27:47.142000+00:00,-1.0
2021-02-21 16:46:20.470000+00:00,-1.0
2021-02-21 16:48:38.891000+00:00,2.0
2021-02-21 16:48:49.463000+00:00,-1.0


In [34]:
# removes the temporary directory
cleanup()
