In [2]:
%load_ext autoreload
%autoreload 2
from expressiveness_benchmark.types import Program, Task
import pandas as pd
from dataclasses import replace

In [1]:
# CHANGE ME!
TASK_ID = 'time_series'
AUTHOR = 'scott'

In [170]:
task = Task(
    id=TASK_ID,
    description="compute windowed averages ordered by time",
    plan=[],
    sample_input={
        "data": [
            {"time": 20, "x": 7.0},
            {"time": 3, "x": 1.0},
            {"time": 1, "x": 2.0},
            {"time": 9, "x": 4.0},
            {"time": 10, "x": 5.0},
            {"time": 11, "x": 6.0},
        ]
    },
    sample_output=[1.5, 5.0, 7.0],
)
task.save()

prototype = Program(
    task=TASK_ID,
    author=AUTHOR,
    language=''    
)

In [159]:
pandas = replace(prototype,
    language='python-pandas',
    implementation='',
    source='''import pandas as pd
import math
def time_series(data):
    def window(t):
        return math.floor(t / 7)
    result = pd.DataFrame(data).sort_values("time").set_index("time").groupby(window).mean()
    return result['x'].tolist()
''').load_plan()
pandas.execute(task)
pandas.save()

In [160]:
python_imp = replace(prototype,
    language='python-imperative',
    implementation='',
    source='''import pandas as pd
import math
def time_series(data):
    def window(value):
        return math.floor(value["time"] / 7)
    if len(data) < 1:
        return []
    data = sorted(data, key=lambda v: v["time"])
    result = []
    current_window, total, count = window(data[0]), data[0]["x"], 1
    for value in data[1:]:
        time_window = window(value)
        if time_window != current_window:
            result.append(total / count)
            current_window, total, count = time_window, 0, 0
        total += value["x"]
        count += 1
    result.append(total / count)
    return result
''').load_plan()
python_imp.execute(task)
python_imp.save()

In [168]:
python_fun = replace(prototype,
    language='python-functional',
    implementation='',
    source='''import math
from itertools import groupby
def time_series(data):
    def window(value):
        return math.floor(value["time"] / 7)
    data = sorted(data, key=lambda d: d["time"])
    groups = [list(values) for _, values in groupby(data, key=window)]
    return [sum([v["x"] for v in values]) / len(values) for values in groups]
''').load_plan()
python_fun.execute(task)
python_fun.save()

In [169]:
datalog = replace(prototype,
    language='datalog',
    source='''
.decl window(w: number)
.decl windowed(w: number, x: float)
.decl windowed_total(w: number, x: float)
window(t/7) :- data(t, _).
windowed(t/7, x) :- data(t, x).
// division by result of count (has type `number`) didn't work
windowed_total(w, total/(n)) :- window(w), total = sum x : { windowed(w, x) },
                              n = sum z : { windowed(w,x), z=1.0 }.
time_series(v) :- windowed_total(_, v).
''').load_plan()
datalog.execute(task)
datalog.save()

In [163]:
sql = replace(prototype,
    language='sql',
    source='''SELECT AVG(x) as x
FROM data
GROUP BY cast(time / 7 as int)
ORDER BY time
''').load_plan()
sql.execute(task)
sql.save()

In [111]:
program = sql
program.widget(task)

Output()

CodeWidget(program='{"task": "time_series", "language": "sql", "plan": {}, "source": " \\n    \\nSELECT AVG(x)â€¦