Skip to content

Commit

Permalink
improving Dask translator
Browse files Browse the repository at this point in the history
  • Loading branch information
rafaelfsilva committed Jul 21, 2023
1 parent 4ad4c86 commit 1d9843a
Show file tree
Hide file tree
Showing 5 changed files with 87 additions and 29 deletions.
2 changes: 1 addition & 1 deletion wfcommons/common/workflow.py
Original file line number Diff line number Diff line change
Expand Up @@ -60,7 +60,7 @@ def __init__(self,
self.wms_name: Optional[str] = "WfCommons" if not wms_name else wms_name
self.wms_version: Optional[str] = str(__version__) if not wms_version else wms_version
self.wms_url: Optional[str] = f"https://docs.wfcommons.org/en/v{__version__}/" if not wms_url else wms_url
self.executed_at: Optional[str] = datetime.now().astimezone().isoformat()) if not executed_at else executed_at
self.executed_at: Optional[str] = datetime.now().astimezone().isoformat() if not executed_at else executed_at
self.makespan: Optional[int] = makespan
self.tasks = {}
self.tasks_parents = {}
Expand Down
2 changes: 1 addition & 1 deletion wfcommons/wfbench/bench.py
Original file line number Diff line number Diff line change
Expand Up @@ -152,7 +152,7 @@ def create_benchmark(self,

task.runtime = 0
task.files = []
task.program = f"{this_dir.joinpath('wfbench.py')}"
task.program = "wfbench.py"
task.args = [task.name]
task.args.extend(params)

Expand Down
50 changes: 42 additions & 8 deletions wfcommons/wfbench/translator/abstract_translator.py
Original file line number Diff line number Diff line change
@@ -1,23 +1,27 @@
#!/usr/bin/env python
# -*- coding: utf-8 -*-
#
# Copyright (c) 2021-2022 The WfCommons Team.
# Copyright (c) 2021-2023 The WfCommons Team.
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.

import logging
import os
import pathlib
import shutil

from abc import ABC, abstractmethod
from typing import Dict, List, Optional, Union
from typing import Optional, Union

from ...common import Task, Workflow
from ...common import FileLink, Task, Workflow
from ...wfinstances.instance import Instance


this_dir = pathlib.Path(__file__).resolve().parent

class Translator(ABC):
"""
An abstract class of WfFormat parser for creating workflow benchmark applications.
Expand Down Expand Up @@ -69,13 +73,43 @@ def __init__(self,
self.task_children[task['name']].append(child)

@abstractmethod
def translate(self, output_file_path: pathlib.Path) -> None:
def translate(self, output_folder: pathlib.Path) -> None:
"""
Translate a workflow benchmark description (WfFormat) into an actual workflow application.
:param output_file_path: The path of the output file.
:type output_file_path: pathlib.Path
:param output_folder: The path to the folder in which the workflow benchmark will be generated.
:type output_folder: pathlib.Path
"""

def _copy_binary_files(self, output_folder: pathlib.Path) -> None:
"""
Copy binary files to workflow benchmark's bin folder.
:param output_folder: The path to the folder in which the workflow benchmark will be generated.
:type output_folder: pathlib.Path
"""
bin_folder = output_folder.joinpath("bin")
bin_folder.mkdir()
shutil.copy(this_dir.joinpath("../wfbench.py"), bin_folder)
shutil.copy(shutil.which("cpu-benchmark"), bin_folder)

def _generate_input_files(self, output_folder: pathlib.Path) -> None:
"""
Generate workflow input files into workflow benchmark's data folder.
:param output_folder: The path to the folder in which the workflow benchmark will be generated.
:type output_folder: pathlib.Path
"""
generated_files = []
data_folder = output_folder.joinpath("data")
data_folder.mkdir()
for task_name in self.root_task_names:
task = self.tasks[task_name]
for file in task.files:
if file.name not in generated_files and file.link == FileLink.INPUT:
generated_files.append(file.name)
with open(data_folder.joinpath(file.name), "wb") as fp:
fp.write(os.urandom(int(file.size)))

def _write_output_file(self, contents: str, output_file_path: pathlib.Path) -> None:
"""
Expand All @@ -91,7 +125,7 @@ def _write_output_file(self, contents: str, output_file_path: pathlib.Path) -> N
out.write(contents)
self.logger.info(f"Translated content written to '{output_file_path}'")

def _find_children(self, task_name: str) -> List[Task]:
def _find_children(self, task_name: str) -> list[Task]:
"""
Find the children for a specific task.
Expand All @@ -108,7 +142,7 @@ def _find_children(self, task_name: str) -> List[Task]:

return children

def _find_parents(self, task_name: str) -> List[Task]:
def _find_parents(self, task_name: str) -> list[Task]:
"""
Find the parents for a specific task.
Expand Down
44 changes: 34 additions & 10 deletions wfcommons/wfbench/translator/dask.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,14 +38,14 @@ def __init__(self,
self.tasks_futures = {}
self.task_id = 0

def translate(self, output_file_name: pathlib.Path) -> None:
def translate(self, output_folder: pathlib.Path) -> None:
"""
Translate a workflow benchmark description (WfFormat) into a Dask workflow application.
Translate a workflow benchmark description (WfFormat) into an actual workflow application.
:param output_file_name: The name of the output file (e.g., workflow.py).
:type output_file_name: pathlib.Path
:param output_folder: The path to the folder in which the workflow benchmark will be generated.
:type output_folder: pathlib.Path
"""
noindent_python_codelines = self._dask_wftasks_codelines("randomizer")
noindent_python_codelines = self._dask_wftasks_codelines("randomizer", output_folder)

for task_name in self.root_task_names:
noindent_python_codelines.extend(self._parse_tasks(task_name))
Expand All @@ -61,30 +61,53 @@ def translate(self, output_file_name: pathlib.Path) -> None:
with open(this_dir.joinpath("templates/dask_template.py")) as fp:
run_workflow_code = fp.read()
run_workflow_code = run_workflow_code.replace("# Generated code goes here", wf_codelines)
with open("dask_workflow.py", "w") as fp:

# write benchmark files
output_folder.mkdir(parents=True)
with open(output_folder.joinpath("dask_workflow.py"), "w") as fp:
fp.write(run_workflow_code)

# additional files
self._copy_binary_files(output_folder)
self._generate_input_files(output_folder)

def _dask_wftasks_codelines(self,
randomizer_varname: str,
output_folder: pathlib.Path,
simulate_minimum_execution_time: float = 0.1,
simulate_maximum_execution_time: float = 1.1) -> list[str]:
"""
Build the code definining all tasks in the workflow, i.e. WorkflowTask instances.
:param randomizer_varname: The name of the randomizer.
:type randomizer_varname: str
:param output_folder: The path to the folder in which the workflow benchmark will be generated.
:type output_folder: pathlib.Path
:return: The non-indented Python lines of code used to instantiate the WorkflowTask instances.
:rtype: list[str]
"""
codelines = ["randomizer = random.Random(seed)",
"TASKS = {}"]
for task in self.tasks.values():
input_files = [f.name for f in task.files if f.link == FileLink.INPUT]
output_files = [f.name for f in task.files if f.link == FileLink.OUTPUT]
input_files = [str(output_folder.joinpath(f"data/{f.name}")) for f in task.files if f.link == FileLink.INPUT]
output_files = [str(output_folder.joinpath(f"data/{f.name}")) for f in task.files if f.link == FileLink.OUTPUT]
program = output_folder.joinpath(f'bin/{task.program}')
args = []
print(task.args)
for a in task.args:
if "--out" in a:
a = a.replace("{", "\"{").replace("}", "}\"").replace(".txt'", ".txt\\\\\"").replace("'", "\\\\\"" + str(output_folder.joinpath("data")) + "/").replace(": ", ":")
elif "--" not in a:
a = str(output_folder.joinpath("data", a))
else:
a = a.replace("'", "\"")
args.append(a)
print(args)
print("")
code = [f"WorkflowTask(dag_id = '{task.name}',",
f" name = '{task.name}',",
f" command_arguments = {[task.program] + task.args},",
f" command_arguments = {[str(program)] + args},",
f" inputs = {input_files},",
f" outputs = {output_files},",
" simulate = simulate,",
Expand All @@ -94,6 +117,7 @@ def _dask_wftasks_codelines(self,
" )"]
codelines.append(f"TASKS['{task.name}'] = {code[0]}")
codelines.extend([codeline for codeline in code[1:]])
# exit(1)
return codelines

def _parse_tasks(self, task_name: str) -> list[str]:
Expand All @@ -115,7 +139,7 @@ def _parse_tasks(self, task_name: str) -> list[str]:
self.parsed_tasks.append(task_name)
self.tasks_futures[task_name] = f"fut_dv_{self.task_id}"
self.task_id += 1
noindent_python_codelines = [f"{self.tasks_futures[task_name]} = client.submit(execute_task, TASKS['{task_name}'], [])"]
noindent_python_codelines = [f"{self.tasks_futures[task_name]} = client.submit(execute_task, TASKS['{task_name}'], {self.task_parents[task_name]})"]

# parse children
for child in self.task_children[task_name]:
Expand Down
18 changes: 9 additions & 9 deletions wfcommons/wfbench/wfbench.py
Original file line number Diff line number Diff line change
Expand Up @@ -118,9 +118,9 @@ def cpu_mem_benchmark(cpu_threads: Optional[int] = 5,
os.sched_setaffinity(cpu_proc.pid, {core})
cpu_procs.append(cpu_proc)

mem_proc = subprocess.Popen(mem_prog)
if core:
os.sched_setaffinity(mem_proc.pid, {core})
# mem_proc = subprocess.Popen(mem_prog)
# if core:
# os.sched_setaffinity(mem_proc.pid, {core})

return cpu_procs

Expand Down Expand Up @@ -148,19 +148,19 @@ def get_parser() -> argparse.ArgumentParser:
return parser


def io_read_benchmark_user_input_data_size(other):
def io_read_benchmark_user_input_data_size(inputs):
print("[WfBench] Starting IO Read Benchmark...")
for file in other:
with open(this_dir.joinpath(file), "rb") as fp:
for file in inputs:
with open(file, "rb") as fp:
print(f"[WfBench] Reading '{file}'")
fp.readlines()
print("[WfBench] Completed IO Read Benchmark!\n")


def io_write_benchmark_user_input_data_size(outputs):
for task_name, file_size in outputs.items():
print(f"[WfBench] Writing output file '{task_name}'\n")
with open(this_dir.joinpath(task_name), "wb") as fp:
for file_name, file_size in outputs.items():
print(f"[WfBench] Writing output file '{file_name}'\n")
with open(file_name, "wb") as fp:
fp.write(os.urandom(int(file_size)))


Expand Down

0 comments on commit 1d9843a

Please sign in to comment.