In [1]:
%load_ext autoreload
%autoreload 2
from expressiveness_benchmark.types import Program, Task
import pandas as pd
from dataclasses import replace

In [2]:
# CHANGE ME!
TASK_ID = 'average_adjacent'
AUTHOR = 'scott'

In [3]:
task = Task(
    id=TASK_ID,
    name="Adjacent averages",
    description="compute averages of adjacent values in time-ordered data",
    category='Time Series',
    plan=[{
        "id": "order",
        "description": "time-ordered data",
    }, {
        "id": "pair adjacent",
        "description": "adjacent values",
    }, {
        "id": "average",
        "description": "compute averages",
    }],
    sample_input={
        "data": [
            {"time": 6, "x": 14.0},
            {"time": 2, "x": 3.0},
            {"time": 1, "x": 1.0},
            {"time": 3, "x": 7.0},
            {"time": 4, "x": 9.0},
            {"time": 5, "x": 11.0},
        ]
    },
    sample_output=[
        {"time": 1, "x": 2.0},
        {"time": 2, "x": 5.0},
        {"time": 3, "x": 8.0},
        {"time": 4, "x": 10.0},
        {"time": 5, "x": 12.5}
    ]
)
task.save()

prototype = Program(
    task=TASK_ID,
    author=AUTHOR,
    language=''    
)

In [4]:
r = replace(prototype,
    language='r',
    source='''average_adjacent <- function(data) {
    
}''').load_plan()
r.execute(task)
r.save()

R[write to console]: ── [1mAttaching packages[22m ───────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────── tidyverse 1.3.0 ──

R[write to console]: [32m✔[39m [34mggplot2[39m 3.3.2     [32m✔[39m [34mpurrr  [39m 0.3.4
[32m✔[39m [34mtibble [39m 3.0.4     [32m✔[39m [34mdplyr  [39m 1.0.2
[32m✔[39m [34mtidyr  [39m 1.1.2     [32m✔[39m [34mstringr[39m 1.4.0
[32m✔[39m [34mreadr  [39m 1.4.0     [32m✔[39m [34mforcats[39m 0.5.0

R[write to console]: ── [1mConflicts[22m ──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────── tidyverse_conflicts() ──
[31m✖[39m [34mdplyr[39m::[32mfilter()[39m masks [34mstats[39m::filter()
[31m✖[39m [34mdplyr[39m::[32mlag()[39m    masks [34mstats[39m::lag()



Mismatch between target and actual output.
Target:     time     x
0   1.0   2.0
1   2.0   5.0
2   3.0   8.0
3   4.0  10.0
4   5.0  12.5
Actual:                                                     0
0  <rpy2.rinterface_lib.sexp.NULLType object at 0...


KeyError: 'time'

In [393]:
python_imp = replace(prototype,
    language='python-imperative',
    implementation='',
    source='''def average_adjacent(data):
  if len(data) == 0:
    return []
  if len(data) == 1:
    return [data[0]["x"]]
  data.sort(key=lambda v: v["time"])
  previous, current = None, data[0]["x"]
  time = data[0]["time"]
  result = []
  for value in data[1:]:
    previous = current; current = value["x"]
    result.append({"time": time, "x": (previous + current)/2})
    time = value["time"]
  return result''').load_plan()
python_imp.execute(task)
python_imp.save()

In [428]:
python_fun = replace(prototype,
    language='python-functional',
    implementation='',
    source='''def average_adjacent(data):
  data = sorted(data, key=lambda v: v["time"])
  return [
    {"time": v["time"],
     "x": (v["x"] + next["x"]) / 2}
    for next, v in zip(data[1:], data[:-1])
  ]''').load_plan()
python_fun.execute(task)
python_fun.save()

In [397]:
pandas = replace(prototype,
    language='python-pandas',
    implementation='',
    source='''def average_adjacent(data):
  data = data.sort_values("time")
  lagging = data[1:].reset_index(drop=True)
  leading = data[:-1].reset_index(drop=True)
  leading.x = (lagging.x + leading.x)/2
  return leading''').load_plan()
pandas.execute(task)
pandas.save()

In [406]:
datalog = replace(prototype,
    language='datalog',
    source='''.decl less(x: number, y: number)
less(x, y) :- data(x, _), data(y, _), x < y.
.decl intermediate(x: number, y: number)
intermediate(x, y) :- less(x, z), less(z, y).
.decl immediate(x: number, y: number)
immediate(x, y) :- less(x, y), !intermediate(x, y).
average_adjacent(t1, v) :-
  immediate(t1, t2),
  data(t1, v1), data(t2, v2),
  v = (v1+v2)/2.''').load_plan()
datalog.execute(task, debug=False)
datalog.save()

In [414]:
sql = replace(prototype,
    language='sql',
    source='''SELECT
  time, (x + next) / 2 as x
FROM (SELECT
  time, x, LEAD(x, 1) OVER (ORDER BY time) as next
  FROM data)
WHERE next is not null
ORDER BY time''').load_plan()
sql.execute(task)
sql.save()

In [425]:
pandas.widget(task)

Output()

CodeWidget(program='{"task": "average_adjacent", "language": "python-pandas", "plan": {"average": [{"line": 4,…

In [401]:
python_imp.widget(task)

Output()

CodeWidget(program='{"task": "average_adjacent", "language": "python-imperative", "plan": {"order": [{"line": …

In [429]:
python_fun.widget(task)

Output()

CodeWidget(program='{"task": "average_adjacent", "language": "python-functional", "plan": {"order": [{"line": …

In [430]:
datalog.widget(task)

Output()

CodeWidget(program='{"task": "average_adjacent", "language": "datalog", "plan": {"order": [{"line": 0, "start"…

In [431]:
sql.widget(task)

Output()

CodeWidget(program='{"task": "average_adjacent", "language": "sql", "plan": {"order": [{"line": 3, "start": 28…