In [9]:
%load_ext autoreload
%autoreload 2
from expressiveness_benchmark.types import Plan, Task, Language, SourceRange, Program
from code_widget.example import CodeWidget
from dataclasses import replace
import json
import pandas as pd

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [10]:
# CHANGE ME!
TASK_ID = 'continent_median_population'
AUTHOR = 'will'

In [17]:
task = Task(
    id=TASK_ID,
    description="Get the median population continent for each continent",
    sample_input={
        "countries": [
            {"name": "USA", "population": 328, "continent": "North America"},
            {"name": "Canada", "population": 37, "continent": "North America"},
            {"name": "Ethiopia", "population": 109, "continent": "Africa"},
        ]
    },
    sample_output=[
        {"continent": "North America", "population": 182.5},
        {"continent": "Africa", "population": 109.0},
    ],
)



In [12]:
sql = replace(prototype,
    language='sql',
    source='''
SELECT continent, AVG(population) as population
FROM
  (SELECT *, 
    row_number() OVER (PARTITION BY continent ORDER BY population) AS rank, 
    count() OVER (PARTITION BY continent) as count
  FROM countries)
WHERE 
  (count % 2 = 1 AND rank = (count + 1) / 2) OR 
  (count % 2 = 0 AND ABS(rank - 0.5 - count / 2) = 0.5)
GROUP BY continent
''')
sql.execute(task)
sql.save()

In [15]:
datalog = replace(prototype,
    language='datalog',
    source='''
.decl rank(continent:symbol, name:symbol, r:float, population:float)
rank(continent, name, r, population) :-
  countries(continent, name, population),
  m = min p : countries(continent, _, p),
  ((population = m, r = 1.0); 
  (rank(continent, other1, r2, p2),
   p2 < population,
   countries(continent, other2, p3),
   !(p3 > p2),
   r = r2 + 1.0)).
   
.decl near_median(continent:symbol, population:float)
near_median(continent, population) :-
  rank(continent, _, r, population),
  c = sum x : { countries(continent, _, _), x = 1.0 },
  ((c % 2 = 1, r = (c + 1) / 2);
  (c % 2 = 0, v = r - 0.5 - c/2, (v = 0.5; v = -0.5))).
  
continent_median_population(continent, median) :-
  countries(continent, _, _),
  median = sum p : near_median(continent, p).
''')
#datalog.execute(task)
#datalog.save()

In [19]:
pandas = replace(prototype,
    language='python-pandas',
    source='''
def continent_median_population(countries):
  return (countries
      .groupby('continent')
      .population.median()
      .reset_index())
''')
pandas.execute(task)
pandas.save()

In [36]:
imperative = replace(prototype,
    language='python-imperative',
    source='''
def continent_median_population(countries):
  populations = defaultdict(list)
  for country in countries:
    populations[country['continent']].append(country['population'])
  
  output = []  
  for continent, pops in populations.items():
    pops.sort()
    N = len(pops)
    if N % 2 == 1:
      median = pops[(N - 1) // 2]
    else:
      median = (pops[N // 2 - 1] + pops[N // 2]) / 2
    output.append({
      "continent": continent,
      "population": median
    })
    
  return output
''')
imperative.execute(task)
imperative.save()

In [35]:
functional = replace(
    prototype,
    language='python-functional',
    source='''
def continent_median_population(countries):
  continents = set([c['continent'] for c in countries])
  populations = {
    continent: [c['population'] for c in countries if c['continent'] == continent]
    for continent in continents
  }
  
  def compute_median(pops):
    pops = sorted(pops)
    N = len(pops)
    if N % 2 == 1:
      return pops[(N - 1) // 2]
    else:
      return (pops[N // 2 - 1] + pops[N // 2]) / 2  
   
  return [
    {"continent": continent, "population": compute_median(pops)}
    for continent, pops in populations.items()
  ]''')
functional.execute(task)
functional.save()