In [6]:
%load_ext autoreload
%autoreload 2
from expressiveness_benchmark.types import Plan, Task, Language, SourceRange, Program
from code_widget.example import CodeWidget
from dataclasses import replace
import json
import pandas as pd

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [7]:
# CHANGE ME!
TASK_ID = 'continent_median_population'
AUTHOR = 'will'

In [16]:
task = Task(
    id=TASK_ID,
    category="Aggregation",
    name="Median population for each continent",
    description="For each continent, return its name and the median population of its countries.",
    plan=[
        Plan(id="iter", description="For each continent"),
        Plan(id="name", description="Its name"),
        Plan(id="group", description="of its countries"),
        Plan(id="agg", description="Median population")
    ],    
    sample_input={
        "countries": [
            {"name": "USA", "population": 328.0, "continent": "North America"},
            {"name": "USA2", "population": 37.0, "continent": "North America"},
            {"name": "Canada", "population": 37.0, "continent": "North America"},
            {"name": "Ethiopia", "population": 109.0, "continent": "Africa"},
        ]
    },
    sample_output=[
        #{"continent": "North America", "population": 182.5},
        {"continent": "North America", "population": 37.0},
        {"continent": "Africa", "population": 109.0},
    ],
)
task.save()


prototype = Program(
    task=TASK_ID,
    author=AUTHOR,
    language=''    
)

In [18]:
sql = replace(prototype,
    language='sql',
    source='''SELECT continent, AVG(population) as population
FROM
  (SELECT *, 
    row_number() OVER (PARTITION BY continent ORDER BY population) AS rank, 
    count() OVER (PARTITION BY continent) as count
  FROM countries)
WHERE 
  (count % 2 = 1 AND rank = (count + 1) / 2) OR 
  (count % 2 = 0 AND ABS(rank - 0.5 - count / 2) = 0.5)
GROUP BY continent''')
sql.execute(task)
sql.save()

In [24]:
datalog = replace(prototype,
    language='datalog',
    source='''
/* .decl unique_id(Country:symbol, Id:number)    
unique_id(Country, $) :- countries(_, Country, _).

.decl rank(Continent:symbol, Country:symbol, R:number, Population:float)
rank(Continent, Country, 0, Population) :-
  countries(Continent, Country, Population),
  unique_id(Country, Id),
  Population = min P : countries(Continent, _, P),
  Id = min Id2 : { countries(Continent, C, Population), unique_id(C, Id2) }.
  
rank(Continent, Country, R + 1, Population) :-
  countries(Continent, Country, Population),
  unique_id(Country, Id),  
  rank(Continent, _, R, Other_pop),
  Population = min P : { countries(Continent, _, P), P > Other_pop },
  Id_min = min Id2 : { countries(Continent, C, Population), unique_id(C, Id2) }.

continent_median_population(Continent, Median) :-
  countries(Continent, _, _),
  Num_countries = count : countries(Continent, _, _),
  ((Num_countries % 2 = 1, 
    rank(Continent, _, (Num_countries - 1) / 2, Median));
   (Num_countries % 2 = 0,
    rank(Continent, _, Num_countries / 2 - 1, P1),
    rank(Continent, _, Num_countries / 2, P2),
    Median = (P1 + P2) / 2)). */
    
    

    
''')
datalog.execute(task, debug=True)
datalog.save()

Path: /var/folders/6p/3bzglbgn2ts3v16zf_27zq3r0000gn/T/tmpv2sl2fmn
Mismatch between target and actual output.
Target:         continent  population
0         Africa       109.0
1  North America        37.0
Actual:         continent  population
0         Africa         109
1  North America         328


AssertionError: 

In [78]:
pandas = replace(prototype,
    language='python-pandas',
    source='''def continent_median_population(countries):
  return (countries
      .groupby('continent')
      .population.median()
      .reset_index())''')
pandas.execute(task)
pandas.save()

In [79]:
imperative = replace(prototype,
    language='python-imperative',
    source='''def continent_median_population(countries):
  populations = defaultdict(list)
  for country in countries:
    populations[country['continent']].append(country['population'])
  
  output = []  
  for continent, pops in populations.items():
    pops.sort()
    N = len(pops)
    if N % 2 == 1:
      median = pops[(N - 1) // 2]
    else:
      median = (pops[N // 2 - 1] + pops[N // 2]) / 2
    output.append({
      "continent": continent,
      "population": median
    })
    
  return output''')
imperative.execute(task)
imperative.save()

In [80]:
functional = replace(
    prototype,
    language='python-functional',
    source='''def continent_median_population(countries):
  continents = set([c['continent'] for c in countries])
  populations = {
    continent: [c['population'] for c in countries if c['continent'] == continent]
    for continent in continents
  }
  
  def compute_median(pops):
    pops = sorted(pops)
    N = len(pops)
    if N % 2 == 1:
      return pops[(N - 1) // 2]
    else:
      return (pops[N // 2 - 1] + pops[N // 2]) / 2  
   
  return [
    {"continent": continent, "population": compute_median(pops)}
    for continent, pops in populations.items()
  ]''')
functional.execute(task)
functional.save()