In [249]:
%load_ext autoreload
%autoreload 2
from expressiveness_benchmark.types import Program, Task
import pandas as pd
from dataclasses import replace

from code_widget.example import CodeWidget
import json

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [250]:
# CHANGE ME!
TASK_ID = 'process_tweets'
AUTHOR = 'scott'

In [252]:
task = Task(
    id=TASK_ID,
    name="process tweets",
    description="select english non-retweets, return timestamp and body, lower-cased",
    category="Basic",
    plan=[{
        "id": "filter",
        "description": "select english non-retweets",
    }, {
        "id": "lowercase",
        "description": "lower-cased"
    }, {
        "id": "select",
        "description": "return timestamp and body"
    }
    ],
    sample_input={
        "data": [
            {"language": "en", "is_retweet": "false", "likes": 8, "body": "Some Text", "ts": 1604534320 },
            {"language": "en", "is_retweet": "true", "likes": 8, "body": "some Text", "ts": 1604534321 },
            {"language": "en", "is_retweet": "false", "likes": 8, "body": "some Text", "ts": 1604534322 },
            {"language": "fr", "is_retweet": "false", "likes": 8, "body": "some Text", "ts": 1604534322 }
        ],
    },
    sample_output=[
        { "body": "some text", "ts": 1604534320 },
        { "body": "some text", "ts": 1604534322 },
    ]
)
task.save()
prototype = Program(
    task=TASK_ID,
    author=AUTHOR,
    language=''
)

In [246]:
python_imperative = replace(prototype,
    language='python-imperative',
    implementation='',
    source='''def process_tweets(data):
  result = []
  for value in data:
    if (value["language"] == "en" and
        value["is_retweet"] == "false"):
      result.append({
        "body": value["body"].lower(),
        "ts": value["ts"]
      })
  return result''').load_plan()
python_imperative.execute(task)
python_imperative.save()

In [226]:
python_functional = replace(prototype,
    language='python-functional',
    implementation='',
    source='''def process_tweets(data):
  return [
    {"body": value["body"].lower(),
     "ts": value["ts"]}
    for value in data
    if value["language"] == "en" and
       value["is_retweet"] == "false" 
  ]''').load_plan()
python_functional.execute(task)
python_functional.save()

In [198]:
# ignoring datalog; missing string primitives

In [231]:
python_pandas = replace(prototype,
    language='python-pandas',
    implementation='',
    source='''def process_tweets(data):
  result = data[
    (data.language == 'en') &
    (data.is_retweet == 'false')]
  result.body = result.body.apply(lambda s: s.lower())
  return result[["body", "ts"]]''').load_plan()
python_pandas.execute(task)
python_pandas.save()

In [237]:
sql = replace(prototype,
    language='sql',
    source='''SELECT LOWER(body) as body, ts
FROM data
WHERE language = "en" and is_retweet = "false"''').load_plan()
sql.execute(task)
sql.save()

In [239]:
sql.widget(task)

Output()

CodeWidget(program='{"task": "process_tweets", "language": "sql", "plan": {"lowercase": [{"line": 0, "start": …

In [234]:
python_functional.widget(task)

Output()

CodeWidget(program='{"task": "process_tweets", "language": "python-functional", "plan": {}, "source": "def pro…

In [247]:
python_imperative.widget(task)

Output()

CodeWidget(program='{"task": "process_tweets", "language": "python-imperative", "plan": {"filter": [{"line": 2…

In [236]:
python_pandas.widget(task)

Output()

CodeWidget(program='{"task": "process_tweets", "language": "python-pandas", "plan": {}, "source": "def process…