# Working with child classes

In some scenarios it is necessary to pass work with an instance of a class within PyTrack.
In the following is a possible scenario in how such things can be handled.

In [1]:
# We are working in a temporary directory for easier cleanup
import os
import shutil
from tempfile import TemporaryDirectory
from pytrack import PyTrackProject

temp_dir = TemporaryDirectory()
cwd = os.getcwd()

shutil.copy("PassingClasses.ipynb", temp_dir.name)

os.chdir(temp_dir.name)

project = PyTrackProject()
project.create_dvc_repository()

In [2]:
from pytrack import PyTrack, DVC
import inspect

class Operation1:
    """
    An implementation or algorithm you want to 
    be tracked by py-track.
    """
    def __init__(self, factor):
        self.factor = factor

    def convert_input(self, inp):
        return inp * self.factor

class Operation2:
    """
    Another implementation or algorithm you want to 
    be tracked by py-track. should accomplish the same
    goal as Operation1 but perhaps in a different way.
    """
    def __init__(self, factor, shift):
        self.factor = factor
        self.shift = shift

    def convert_input(self, inp):
        return inp * self.factor + self.shift


@PyTrack(nb_name="PassingClasses.ipynb")
class ChildHandler:
    """
    PyTrack enabled class which handles calling the
    implementations.
    """
    def __init__(self):
        """
        Standard constructor for the class where the
        different operations are given as attributes.
        
        Beyond defining the operations, inputs and outputs
        are also set as DVC trackable attributes.
        """
        self.methods = {
            Operation1.__name__: Operation1,
            Operation2.__name__: Operation2
        }

        self.operation = DVC.params()
        self.operation_parameter = DVC.params()
        self.input = DVC.params()
        self.output = DVC.result()

    def __call__(self, operation: object, inp):
        """
        This is what will be called when you run the
        script. For example:
            data = ChildHandler(Operation1(), ...)
        Calling of this class will add it to the 
        computation graph which is later executed.
        """
        self.operation = operation.__class__.__name__
        operation_parameter = {}
        for key in inspect.signature(operation.__class__.__init__).parameters:
            if key == "self":
                continue
            operation_parameter[key] = getattr(operation, key)
        self.operation_parameter = operation_parameter
        # Note we have to use "=" here and can not update it iteratively!

        self.input = inp
    def run(self):
        """
        Called on the backed to execute the methods in
        one of the child classes. In this case, the 
        method we want to run in the child class is 
        called convert_input.
        """
        Operation = self.methods[self.operation]
        operation = Operation(**self.operation_parameter)

        self.output = operation.convert_input(self.input)

Jupyter support is an experimental feature! Please save your notebook before running this command!
Submit issues to https://github.com/zincware/py-track.


We are using `inspect.signature` and assume that the class attributes have the same name as the `__init__`
 parameters.
 They can be changed in any way though.

Let us now use both passed child methods and see how it works out.

In [3]:
child_handler = ChildHandler()
operation = Operation1(factor=3)
child_handler(operation=operation, inp=15)

project.queue(name="Op1")

child_handler = ChildHandler()
operation = Operation2(factor=2, shift=500)
child_handler(operation=operation, inp=15)

project.queue(name="Op2")
project.run_all()

Used mutable type dict for params! Always overwrite the params and don't alter it otherwise! It won't work.
No results found!
--- Writing new DVC file! ---
Overwriting existing configuration!
Result can only be changed within `run` call!
Used mutable type dict for params! Always overwrite the params and don't alter it otherwise! It won't work.
No results found!
--- Writing new DVC file! ---
Overwriting existing configuration!


We can now load them and see the results

In [8]:
project.load("Op1")
print(child_handler.operation_parameter)
print(child_handler.output)

{'factor': 3}
45


In [9]:
project.load("Op2")
print(child_handler.operation_parameter)
print(child_handler.output)

{'factor': 2, 'shift': 500}
530


This nested methods can get arbitrarily complicated, having different dependencies and outputs.
In that case it might be useful to have a common parent class and loop over e.g., a list of dependecies as such:

In [18]:
class Operation:
    def __init__(self):
        self.dependencies = []
        self.outs = []

class Operation3(Operation):
    def __init__(self):
        super().__init__()
        self.dependencies = ['File1', 'File2', 'File3']
        self.outs = ['Out1.txt', 'Out2.txt']

@PyTrack()
class ChildHandler:
    def __init__(self):
        self.methods = {
            Operation3.__name__: Operation3
        }
        self.method = DVC.params()
        self.deps = DVC.deps()
        self.outs = DVC.outs()

    def __call__(self, operation: Operation):
        self.deps = [x for x in operation.dependencies]
        self.outs = [x for x in operation.outs]

    def run(self):
        method = self.methods[self.method]
        method.outs = self.outs
        # this is required because it adapts the paths to the correct directory!
        # It also converts them into pathlib.Path objects.

Now we can pass our custom operation with a list of dependencies and outputs

In [19]:
child_handler = ChildHandler()
operation = Operation3()
child_handler(operation)

Used mutable type list for deps! Always overwrite the deps and don't append to it! It won't work.
Used mutable type list for outs! Always overwrite the outs and don't append to it! It won't work.
--- Writing new DVC file! ---
Overwriting existing configuration!


If we now look at the PyTrack dependencies and outputs they are all set correctly.

In [20]:
print(child_handler.deps)
print(child_handler.outs)

[PosixPath('File1'), PosixPath('File2'), PosixPath('File3')]
[PosixPath('outs/Out1.txt'), PosixPath('outs/Out2.txt')]


In [21]:
# Cleanup
temp_dir.cleanup()