In [202]:
%load_ext autoreload
%autoreload 2
from expressiveness_benchmark.types import Program, Task
import pandas as pd
from dataclasses import replace

from code_widget.example import CodeWidget
import json

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [203]:
# CHANGE ME!
TASK_ID = 'process_tweets'
AUTHOR = 'scott'

In [205]:
task = Task(
    id=TASK_ID,
    name="process tweets",
    description="select english tweets that are not retweets, return timestamp and lower-cased tweet contents",
    category="Method Chaining",
    plan=[{
        "id": "filter",
        "description": "select english non-retweets",
    }, {
        "id": "lowercase",
        "description": "lowercase tweet text"
    }, {
        "id": "select",
        "description": "select ts and body"
    }
    ],
    sample_input={
        "data": [
            {"language": "en", "is_retweet": "false", "likes": 8, "body": "Some Text", "ts": 1604534320 },
            {"language": "en", "is_retweet": "true", "likes": 8, "body": "some Text", "ts": 1604534321 },
            {"language": "en", "is_retweet": "false", "likes": 8, "body": "some Text", "ts": 1604534322 },
            {"language": "fr", "is_retweet": "false", "likes": 8, "body": "some Text", "ts": 1604534322 }
        ],
    },
    sample_output=[
        { "body": "some text", "ts": 1604534320 },
        { "body": "some text", "ts": 1604534322 },
    ]
)
task.save()
prototype = Program(
    task=TASK_ID,
    author=AUTHOR,
    language=''
)

In [196]:
python_imperative = replace(prototype,
    language='python-imperative',
    implementation='',
    source='''def process_tweets(data):
    result = []
    for value in data:
        if value["language"] == "en" and \
           value["is_retweet"] == "false":
            result.append({"body": value["body"].lower(), "ts": value["ts"]})
    return result''').load_plan()
python_imperative.execute(task)
python_imperative.save()

In [197]:
python_functional = replace(prototype,
    language='python-functional',
    implementation='',
    source='''def process_tweets(data):
    return [{
        "body": value["body"].lower(),
        "ts": value["ts"]}
      for value in data
      if value["language"] == "en" and value["is_retweet"] == "false" 
    ]''').load_plan()
python_functional.execute(task)
python_functional.save()

In [198]:
# ignoring datalog; missing string primitives

In [199]:
python_pandas = replace(prototype,
    language='python-pandas',
    implementation='',
    source='''def process_tweets(data):
    result = data[(data.language == 'en') & (data.is_retweet == 'false')]
    result.body = result.body.apply(lambda s: s.lower())
    return result[["body", "ts"]]''').load_plan()
python_pandas.execute(task)
python_pandas.save()

In [200]:
sql = replace(prototype,
    language='sql',
    source='''SELECT LOWER(body) as body, ts
FROM data
WHERE language = "en" and is_retweet = "false"''').load_plan()
sql.execute(task)
sql.save()