# Basic Usage of Flux



## Introduction

In [10]:
from flux import Flux
from flux import MemoryDataset

In [27]:
flux = Flux()

### Adding Datasets

The most fundamental dataset is the MemoryDataset. This object is just a
wrapper of python objects. You can save any object inside and it will be 
save in memory.

To view all datasets in current the current Flux, you can access the catalog
property.

In [28]:
from flux import MemoryDataset

input_ds = MemoryDataset(data=1, description='input data')

flux.add_dataset(name='input',dataset=input_ds)

2022-09-06 17:00:19,219 - flux.data.catalog - INFO - Adding dataset: input


In [29]:
flux.catalog

{'input': MemoryDataset(description='input data', _data='1')}

### Adding Nodes

Adding nodes is as simple as passing a python function, its inputs and its expected outputs.

The inputs are required to be available in the flux catalog in order for the node to excecute succesfully.

On the other hand, outputs are created dinamically as MemoryDatasets if they are not in the catalog.


To view all nodes inside the curren flux object you can access the pipeline property.

In [30]:

def add_number(input, number=0):
    return input+number

flux.add_node(
    func=add_number,
    inputs='input',
    outputs='sum_output',
    func_kwargs={"number":10}
)

def multiply_number(input, number=1):
    return input*number

flux.add_node(
    func=multiply_number,
    inputs='sum_output',
    outputs='output',
    func_kwargs={"number":2}
)

2022-09-06 17:00:20,148 - flux.pipeline.pipeline - INFO - Node add_number added to Pipeline 
2022-09-06 17:00:20,151 - flux.pipeline.pipeline - INFO - Node multiply_number added to Pipeline 


In [31]:
flux.pipeline

Pipeline 
 - Node: add_number([input]) -> [sum_output]
- Node: multiply_number([sum_output]) -> [output]

### Running Flux

In [32]:
flux.run()

2022-09-06 17:00:21,856 - flux.data.datasets - INFO - Loading MemoryDataset(description='input data', _data='1')
2022-09-06 17:00:21,857 - flux.pipeline.node - INFO - Running Node add_number
2022-09-06 17:00:21,858 - flux.data.catalog - INFO - Saving dataset: sum_output
2022-09-06 17:00:21,859 - flux.data.catalog - INFO - Adding dataset: sum_output
2022-09-06 17:00:21,861 - flux.data.datasets - INFO - Loading MemoryDataset(_data='11')
2022-09-06 17:00:21,863 - flux.pipeline.node - INFO - Running Node multiply_number
2022-09-06 17:00:21,865 - flux.data.catalog - INFO - Saving dataset: output
2022-09-06 17:00:21,866 - flux.data.catalog - INFO - Adding dataset: output
2022-09-06 17:00:21,867 - flux.data.catalog - INFO - Removing dataset: sum_output


In [33]:
flux.catalog

{'input': MemoryDataset(description='input data', _data='1'),
 'output': MemoryDataset(_data='22')}

### Accessing Results

In [34]:
flux.load_dataset(name='output')

2022-09-06 17:00:33,822 - flux.data.datasets - INFO - Loading MemoryDataset(_data='22')


22

### Saving Flux

In [20]:
flux.save('../data/basic_flux')

### Loading Flux

In [6]:
new_flux = Flux()
new_flux.load('../data/basic_flux.pkl')

In [7]:
new_flux.catalog

{}

In [8]:
new_flux.pipeline

Pipeline 
 - Node: add_number([input]) -> [sum_output]
- Node: multiply_number([sum_output]) -> [output]

In [11]:
# Adding new dataset in order to run flux
new_flux.add_dataset(
    name='input',
    dataset=MemoryDataset(data=2)
)

In [12]:
new_flux.run()

### Overriding Datasets

In [10]:
flux = Flux()

input_ds = MemoryDataset(data=1, description='input data')

flux.add_dataset(name='input',dataset=input_ds)

def add_number(input, number=0):
    return input+number

flux.add_node(
    name='add_number',
    func=add_number,
    inputs='input',
    outputs='interim',
    func_kwargs={"number":10}
)

def multiply_number(input, number=1):
    return input*number

flux.add_node(
    name='multiply_number',
    func=multiply_number,
    inputs='interim',
    outputs='interim',
    func_kwargs={"number":2}
)

flux.add_node(
    name='add_number_2',
    func=add_number,
    inputs='interim',
    outputs='output',
    func_kwargs={"number":2}
)

2022-09-06 16:38:51,956 - flux.data.catalog - INFO - Adding dataset: input
2022-09-06 16:38:51,957 - flux.pipeline.pipeline - INFO - Node add_number added to Pipeline 
2022-09-06 16:38:51,958 - flux.pipeline.pipeline - INFO - Node multiply_number added to Pipeline 
2022-09-06 16:38:51,959 - flux.pipeline.pipeline - INFO - Node add_number_2 added to Pipeline 


In [11]:
flux()

2022-09-06 16:38:53,173 - flux.data.datasets - INFO - Loading MemoryDataset(description='input data', _data='1')
2022-09-06 16:38:53,174 - flux.pipeline.node - INFO - Running Node add_number
2022-09-06 16:38:53,175 - flux.data.catalog - INFO - Saving dataset: interim
2022-09-06 16:38:53,176 - flux.data.catalog - INFO - Adding dataset: interim
2022-09-06 16:38:53,177 - flux.data.datasets - INFO - Loading MemoryDataset(_data='11')
2022-09-06 16:38:53,178 - flux.pipeline.node - INFO - Running Node multiply_number
2022-09-06 16:38:53,178 - flux.data.catalog - INFO - Saving dataset: interim
2022-09-06 16:38:53,179 - flux.data.datasets - INFO - Saving MemoryDataset(_data='11')
2022-09-06 16:38:53,180 - flux.data.datasets - INFO - Loading MemoryDataset(_data='22')
2022-09-06 16:38:53,180 - flux.pipeline.node - INFO - Running Node add_number_2
2022-09-06 16:38:53,181 - flux.data.catalog - INFO - Saving dataset: output
2022-09-06 16:38:53,181 - flux.data.catalog - INFO - Adding dataset: output
