In [2]:
%load_ext autoreload
%autoreload 2
from expressiveness_benchmark.types import Plan, Task, Language, SourceRange, Program
from code_widget.example import CodeWidget
from dataclasses import replace
import json
import pandas as pd

In [3]:
# CHANGE ME!
TASK_ID = 'continent_by_population'
AUTHOR = 'will'

In [4]:
task = Task(
    id=TASK_ID,
    category="Aggregation",
    name="Continent with the highest average population",
    description="Find the name of the continent with the highest average population by country.",
    plan=[
        Plan(id="name", description="name of the continent"),
        Plan(id="group", description="by country"),
        Plan(id="average", description="average population"),
        Plan(id="max", description="highest")
    ],
    sample_input={
        "countries": [
            {"name": "USA", "population": 328, "continent": "North America"},
            {"name": "Canada", "population": 37, "continent": "North America"},
            {"name": "Ethiopia", "population": 109, "continent": "Africa"},
            {"name": "Kenya", "population": 51, "continent": "Africa"},
        ]
    },
    sample_output="North America",
)
task.save()

prototype = Program(
    task=TASK_ID,
    author=AUTHOR,
    language=''    
)

In [9]:
q = replace(prototype,
    language='q',
    source='''averages: 
  select avg(population) by continent from countries;
continent_by_population: 
  (first select[>population] continent from averages) `continent''').load_plan()
q.execute(task)
q.save()

In [10]:
q.widget(task)

Output()

CodeWidget(program='{"task": "continent_by_population", "language": "q", "plan": {}, "source": "averages: \\n …

In [5]:
sql = replace(prototype,
    language='sql',
    source='''SELECT continent 
FROM countries
GROUP BY continent
ORDER BY AVG(population) DESC
LIMIT 1''').load_plan()
sql.execute(task)
sql.save()

In [6]:
sql.widget(task)

Output()

CodeWidget(program='{"task": "continent_by_population", "language": "sql", "plan": {"max": [{"line": 3, "start…

In [27]:
datalog = replace(prototype,
    language='datalog',
    source='''.decl average_population(Continent:symbol, Avg:number)
average_population(Continent, Avg) :-
  countries(Continent, _, _),
  Total = sum P : countries(Continent, _, P),
  Num_countries = count : countries(Continent, _, _),
  Avg = Total / Num_countries.
  
continent_by_population(Continent) :- 
  countries(Continent, _, _), 
  average_population(Continent, Max_avg),
  Max_avg = max A : { countries(C, _, _), average_population(C, A) }.''').load_plan()
datalog.execute(task)
datalog.save()

In [28]:
datalog.widget(task)

Output()

CodeWidget(program='{"task": "continent_by_population", "language": "datalog", "plan": {}, "source": ".decl av…

In [29]:
pandas = replace(prototype,
    language='python-pandas',
    source='''def continent_by_population(countries):
  mean_pop = countries.groupby('continent').population.mean()
  return mean_pop.index[mean_pop.argmax()]''').load_plan()
pandas.execute(task)
pandas.save()

In [30]:
pandas.widget(task)

Output()

CodeWidget(program='{"task": "continent_by_population", "language": "python-pandas", "plan": {"group": [{"line…

In [25]:
imperative = replace(prototype,
    language='python-imperative',
    source='''def continent_by_population(countries):
  continent_stats = defaultdict(lambda: [0, 0])
  for country in countries:
    continent = country['continent']
    continent_stats[continent][0] += country['population']
    continent_stats[continent][1] += 1
     
  max_continent = None
  max_average = None
  for continent, [total, count] in continent_stats.items():
    average = total / count
    if max_average is None or max_average < average:
      max_average = average
      max_continent = continent
      
  return max_continent''').load_plan()
imperative.execute(task)
imperative.save()

In [26]:
imperative.widget(task)

Output()

CodeWidget(program='{"task": "continent_by_population", "language": "python-imperative", "plan": {}, "source":…

In [21]:
functional = replace(prototype,
    language='python-functional',
    source='''def continent_by_population(countries):
  continents = set([c['continent'] for c in countries])
  populations_by_continent = [
    (continent, [c['population'] for c in countries 
                 if c['continent'] == continent])
    for continent in continents
  ]
  averages = [
    (continent, sum(pops) / len(pops))
    for continent, pops in populations_by_continent
  ]
  return max(averages, key=lambda t: t[1])[0]''').load_plan()
functional.execute(task)
functional.save()

In [22]:
functional.widget(task)

Output()

CodeWidget(program='{"task": "continent_by_population", "language": "python-functional", "plan": {}, "source":…

In [17]:
r = replace(prototype,
    language='r',
    source='''continent_by_population <- function(countries) {
  countries %>%
    group_by(continent) %>%
    summarize(mean_pop = mean(population)) %>%
    slice(which.max(mean_pop)) %>%
    .$continent
}''').load_plan()
r.execute(task)
r.save()

`summarise()` ungrouping output (override with `.groups` argument)


In [18]:
r.widget(task)

Output()

CodeWidget(program='{"task": "continent_by_population", "language": "r", "plan": {}, "source": "continent_by_p…