Skip to content

Commit

Permalink
Add performance measurement scripts.
Browse files Browse the repository at this point in the history
  • Loading branch information
ynikitenko committed Nov 14, 2021
1 parent f0b9c5b commit bb1770a
Show file tree
Hide file tree
Showing 8 changed files with 266 additions and 0 deletions.
2 changes: 2 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,8 @@

# output and executables
docs/examples/tutorial/*/output
# add new files from there manually
docs/examples/performance/*

# Sphinx
docs/build
Expand Down
16 changes: 16 additions & 0 deletions docs/examples/performance/compute_perf.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
{
echo \# Lena commit: `git log --pretty=format:'%h' -n 1`, `python -V`
# It's meaningful to not produce plots (because we don't measure pdflatex performance).
# But it's safe to "produce" them in code if they exist: they won't be reproduced!
# Disk cache seems not relevant, because the times don't change much between runs
# (and time of pure read is very very small)
#
echo \# one histogram:
/bin/time python lena_xs.py 2>&1 >/dev/null
echo \# Split, two histograms:
/bin/time python lena_xy.py 2>&1 >/dev/null
} >> performance.txt
# feel free to add meaningful comments to performance.txt!
#
# anonymous function trick taken from https://stackoverflow.com/a/315113/952234
# help with redirection from https://stackoverflow.com/a/549776/952234
2 changes: 2 additions & 0 deletions docs/examples/performance/gen_data.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
cd ../tutorial/generate_data && python generate_normal.py --large
# about 3 seconds
19 changes: 19 additions & 0 deletions docs/examples/performance/histogram_1d.tex
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
\documentclass{standalone}
\usepackage{tikz}
\usepackage{pgfplots}
\pgfplotsset{compat=1.18}

\begin{document}
\BLOCK{ set var = variable if variable else '' }
\begin{tikzpicture}
\begin{axis}[
\BLOCK{ if var.latex_name }
xlabel = {$\VAR{ var.latex_name }$ \BLOCK{ if var.unit }[$\mathrm{\VAR{ var.unit }}$]\BLOCK{ endif }},
\BLOCK{ endif }
]
\addplot [
const plot,
] table [col sep=comma, header=false] {\VAR{ output.filepath }};
\end{axis}
\end{tikzpicture}
\end{document}
14 changes: 14 additions & 0 deletions docs/examples/performance/histogram_1d_simple.tex
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
\documentclass{standalone}
\usepackage{tikz}
\usepackage{pgfplots}
\pgfplotsset{compat=1.18}

\begin{document}
\begin{tikzpicture}
\begin{axis}[]
\addplot [
const plot,
] table [col sep=comma, header=false] {\VAR{ output.filepath }};
\end{axis}
\end{tikzpicture}
\end{document}
85 changes: 85 additions & 0 deletions docs/examples/performance/lena_xs.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,85 @@
import os
import sys

import lena.math
from lena.core import Sequence, Source
from lena.flow import Print, Cache, Slice
from lena.context import Context, UpdateContext
from lena.output import Write, ToCSV, RenderLaTeX, LaTeXToPDF, PDFToPNG
from lena.structures import Histogram


data_path = os.path.join("..", "tutorial", "data")
data_file = os.path.join(data_path, "normal_3d_large.csv")


def get_filenames():
filenames = [data_file]
for filename in filenames:
yield filename


class GetCoordinates():
"""Read coordinates from CSV files."""
def __init__(self):
pass

def run(self, flow):
for file_ in flow:
with open(file_) as fil:
for line in fil:
yield [float(coord) for coord in line.split(',')]

## note that there is no coupling between these functions


def main():
filenames = get_filenames()
write = Write("output")

s = Sequence(
GetCoordinates(),
lambda coord: coord[0], # x
Histogram(lena.math.mesh((-10, 10), 100)),
UpdateContext("output.filename", "x"),
# output
ToCSV(),
write,
RenderLaTeX("histogram_1d_simple.tex"),
write,
LaTeXToPDF(),
PDFToPNG(),
)

return s.run(filenames)
# return s()
# /bin/time with already produced plot:
# 3.26user 0.00system 0:03.27elapsed 99%CPU (0avgtext+0avgdata 18000maxresident)k
# 0inputs+0outputs (0major+2735minor)pagefaults 0swaps
# PyPy:
# 1.11user 0.03system 0:01.21elapsed 93%CPU (0avgtext+0avgdata 92996maxresident)k
# 12168inputs+736outputs (59major+12457minor)pagefaults 0swaps


def read_data(file_):
"""Read lines of a file, used only for performance measurement."""
with open(file_) as fil:
for line in fil:
coord_ = [float(coord) for coord in line.split(',')]


if __name__ == "__main__":
if "read_data" in sys.argv:
read_data(data_file)
sys.exit(0)
# without split and float():
# Python results (PyPy slower):
# 0.13user 0.01system 0:00.14elapsed 99%CPU (0avgtext+0avgdata 17840maxresident)k
# 0inputs+0outputs (0major+2709minor)pagefaults 0swaps
# with split and float():
# Python results (PyPy similar):
# 0.81user 0.02system 0:00.84elapsed 99%CPU (0avgtext+0avgdata 18084maxresident)k
# 0inputs+0outputs (0major+2713minor)pagefaults 0swaps

for result in main():
print(result)
56 changes: 56 additions & 0 deletions docs/examples/performance/lena_xy.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,56 @@
import os
import sys

import lena.math
from lena.core import Sequence, Source, Split
from lena.variables import Variable, Combine
from lena.flow import Print, Cache, Slice
from lena.context import Context, UpdateContext
from lena.output import (
Write, ToCSV, RenderLaTeX, LaTeXToPDF, PDFToPNG, MakeFilename
)
from lena.structures import Histogram

from lena_xs import get_filenames, GetCoordinates, data_file


def main():
write = Write("output")

s = Source(
get_filenames,
GetCoordinates(),
Split([
(
Variable("x", lambda coord: coord[0]),
Histogram(lena.math.mesh((-10, 10), 100)),
),
(
Variable("y", lambda coord: coord[1],
latex_name="y", unit="mm"),
Histogram(lena.math.mesh((-10, 10), 100)),
),
]),
MakeFilename("{{variable.name}}"),
# UpdateContext("output.filename", "x"),
ToCSV(),
write,
RenderLaTeX("histogram_1d.tex"),
write,
LaTeXToPDF(),
PDFToPNG(),
)

return s()


if __name__ == "__main__":
for result in main():
print(result)

# Python:
# 17.09user 0.00system 0:17.10elapsed 99%CPU (0avgtext+0avgdata 18540maxresident)k
# 0inputs+0outputs (0major+2851minor)pagefaults 0swaps
# PyPy:
# 6.34user 0.06system 0:06.45elapsed 99%CPU (0avgtext+0avgdata 93756maxresident)k
# 0inputs+0outputs (0major+12357minor)pagefaults 0swaps
72 changes: 72 additions & 0 deletions docs/examples/performance/numpy_xs.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,72 @@
import os
import numpy
from matplotlib import pyplot as plt


def plot_data():
data_path = os.path.join("..", "tutorial", "data")
data_file = os.path.join(data_path, "normal_3d_large.csv")
use_np_load = False

if use_np_load:
# The recommended way of plotting data from a file is ...
# numpy.loadtxt or pandas.read_csv to read the data.
# These are more powerful and faster.
# https://matplotlib.org/3.2.2/gallery/misc/plotfile_demo_sgskip.html
data = numpy.loadtxt(data_file, delimiter=',', usecols=0)
# /bin/time, without savefig
# 2.94user 0.30system 0:03.00elapsed 107%CPU (0avgtext+0avgdata 117196maxresident)k
# 0inputs+0outputs (0major+19994minor)pagefaults 0swaps
else:
filenames = [data_file]
data = []
for filename in filenames:
with open(filename) as fil:
for line in fil:
data.append(float(line.split(',')[0]))
# /bin/time, without savefig
# 3.11user 0.33system 0:03.17elapsed 108%CPU (0avgtext+0avgdata 437396maxresident)k
# 0inputs+0outputs (0major+97214minor)pagefaults 0swaps

plt.hist(data, bins=100, range=[-10, 10])
# plt.savefig(os.path.join("output", "pyplot_xs.png"))


if __name__ == "__main__":
plot_data()


## Educational notes. Examples were not tested and are not used.

# "Top-down"

def td_read_files(filenames, data):
for filename in filenames:
with open(filename) as fil:
for line in fil:
data.append(td_read_line(line))


def td_read_line(line):
"""Return x column"""
return float(line.split(',')[0])


# "Bottom-up"

def get_filenames():
filenames = []
for filename in filenames:
yield filename

def bu_read_lines():
# or get data as an argument
data = []
for filename in get_filenames():
with open(filename) as fil:
for line in fil:
data.append(float(line.split(',')[0]))
return data


## note the coupling between these functions

0 comments on commit bb1770a

Please sign in to comment.