In [1]:
%load_ext autoreload
%autoreload 2
from expressiveness_benchmark.types import Plan, Task, Language, SourceRange, Program
from code_widget.example import CodeWidget
from dataclasses import replace
import json
import pandas as pd

In [2]:
# CHANGE ME!
TASK_ID = 'continent_median_population'
AUTHOR = 'will'

In [3]:
task = Task(
    id=TASK_ID,
    category="Aggregation",
    name="Median population for each continent",
    description="For each continent, return its name and the median population of its countries.",
    plan=[
        Plan(id="iter", description="For each continent"),
        Plan(id="name", description="its name"),
        Plan(id="group", description="of its countries"),
        Plan(id="agg", description="median population")
    ],    
    sample_input={
        "countries": [
            {"name": "USA", "population": 328.0, "continent": "North America"},
            {"name": "USA2", "population": 37.0, "continent": "North America"},
            {"name": "Canada", "population": 37.0, "continent": "North America"},
            {"name": "Ethiopia", "population": 109.0, "continent": "Africa"},
        ]
    },
    sample_output=[
        #{"continent": "North America", "population": 182.5},
        {"continent": "North America", "population": 37.0},
        {"continent": "Africa", "population": 109.0},
    ],
)
task.save()


prototype = Program(
    task=TASK_ID,
    author=AUTHOR,
    language=''    
)

In [8]:
q = replace(prototype,
    language='q',
    source='''continent_median_population:
  () xkey select med[population] by continent from countries''').load_plan()
q.execute(task)
q.save()

In [9]:
q.widget(task)

Output()

CodeWidget(program='{"task": "continent_median_population", "language": "q", "plan": {}, "source": "continent_…

In [4]:
r = replace(prototype,
    language='r',
    source='''continent_median_population <- function(countries) {
  countries %>%
    group_by(continent) %>%
    summarize(population = median(population))
}''').load_plan()
r.execute(task)
r.save()

R[write to console]: ── [1mAttaching packages[22m ─────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────── tidyverse 1.3.0 ──

R[write to console]: [32m✔[39m [34mggplot2[39m 3.3.2     [32m✔[39m [34mpurrr  [39m 0.3.4
[32m✔[39m [34mtibble [39m 3.0.4     [32m✔[39m [34mdplyr  [39m 1.0.2
[32m✔[39m [34mtidyr  [39m 1.1.2     [32m✔[39m [34mstringr[39m 1.4.0
[32m✔[39m [34mreadr  [39m 1.4.0     [32m✔[39m [34mforcats[39m 0.5.0

R[write to console]: ── [1mConflicts[22m ────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────── tidyverse_conflicts() ──
[31m✖[39m [34mdplyr[39m::[32mfilter()[39m masks [34mstats[39m::filter()
[31m✖[39m [34mdplyr[39m::[32mlag()[39m    masks [34mstats[39m::lag()



`summarise()` ungrouping output (override with `.groups` argument)


In [6]:
r.widget(task)

Output()

CodeWidget(program='{"task": "continent_median_population", "language": "r", "plan": {}, "source": "continent_…

In [19]:
sql = replace(prototype,
    language='sql',
    source='''SELECT continent, AVG(population) as population
FROM
  (SELECT *, 
    row_number() OVER 
      (PARTITION BY continent ORDER BY population) AS rank, 
    count() OVER 
      (PARTITION BY continent) as count
  FROM countries)
WHERE 
  (count % 2 = 1 AND rank = (count + 1) / 2) OR 
  (count % 2 = 0 AND ABS(rank - 0.5 - count / 2) = 0.5)
GROUP BY continent''').load_plan()
sql.execute(task)
sql.save()

In [20]:
sql.widget(task)

Output()

CodeWidget(program='{"task": "continent_median_population", "language": "sql", "plan": {"name": [{"line": 0, "…

In [28]:
datalog = replace(prototype,
    language='datalog',
    source='''.decl unique_id(Country:symbol, Id:number)    
unique_id(Country, $) :- countries(_, Country, _).

.decl rank(Continent:symbol, R:number, Population:float)
rank(Continent, R, Population) :-
  countries(Continent, Country, Population),
  unique_id(Country, Id),
  R_less = count : { 
    countries(Continent, C, P2), 
    P2 < Population
  },
  R_eq = count : {
    countries(Continent, C, Population), 
    unique_id(C, Id2),
    Id2 < Id
  },
  R = R_less + R_eq.

continent_median_population(Continent, Median) :-
  countries(Continent, _, _),
  Num_countries = count : countries(Continent, _, _),
  ((Num_countries % 2 = 1, 
    rank(Continent, (Num_countries - 1) / 2, Median));
   (Num_countries % 2 = 0,
    rank(Continent, Num_countries / 2 - 1, P1),
    rank(Continent, Num_countries / 2, P2),
    Median = (P1 + P2) / 2)).''')
#datalog.execute(task, debug=True)
datalog.save()

In [21]:
datalog.widget(task)

Output()

CodeWidget(program='{"task": "continent_median_population", "language": "datalog", "plan": {}, "source": ".dec…

In [4]:
pandas = replace(prototype,
    language='python-pandas',
    source='''def continent_median_population(countries):
  return (countries
      .groupby('continent')
      .population.median()
      .reset_index())''').load_plan()
pandas.execute(task)
pandas.save()

In [7]:
pandas.widget(task)

Output()

CodeWidget(program='{"task": "continent_median_population", "language": "python-pandas", "plan": {"name": [{"l…

In [8]:
imperative = replace(prototype,
    language='python-imperative',
    source='''def continent_median_population(countries):
  populations = defaultdict(list)
  for country in countries:
    populations[country['continent']].append(country['population'])
  
  output = []  
  for continent, pops in populations.items():
    pops.sort()
    N = len(pops)
    if N % 2 == 1:
      median = pops[(N - 1) // 2]
    else:
      median = (pops[N // 2 - 1] + pops[N // 2]) / 2
    output.append({
      "continent": continent,
      "population": median
    })
    
  return output''').load_plan()
imperative.execute(task)
imperative.save()

In [9]:
imperative.widget(task)

Output()

CodeWidget(program='{"task": "continent_median_population", "language": "python-imperative", "plan": {}, "sour…

In [30]:
functional = replace(
    prototype,
    language='python-functional',
    source='''def continent_median_population(countries):
  continents = set([c['continent'] for c in countries])
  populations = {
    continent: [
      c['population'] for c in countries 
      if c['continent'] == continent
    ]
    for continent in continents
  }
  
  def compute_median(pops):
    pops = sorted(pops)
    N = len(pops)
    if N % 2 == 1:
      return pops[(N - 1) // 2]
    else:
      return (pops[N // 2 - 1] + pops[N // 2]) / 2  
   
  return [
    {"continent": continent, "population": compute_median(pops)}
    for continent, pops in populations.items()
  ]''').load_plan()
functional.execute(task)
functional.save()

In [31]:
functional.widget(task)

Output()

CodeWidget(program='{"task": "continent_median_population", "language": "python-functional", "plan": {}, "sour…