In [None]:
! pip install --user --quiet psycopg2-binary
! pip install --user --quiet records
! pip install --user --quiet cockroachdb
! pip install --user --quiet toolz

In [56]:
import os

os.environ['CHATBASE_DATABASE'] =  "chatroach"
os.environ['CHATBASE_USER'] =  "chatroach"
os.environ['CHATBASE_PASSWORD'] =  ""
os.environ['CHATBASE_HOST'] =  "localhost"
os.environ['CHATBASE_PORT'] =  "5432"

In [66]:
BAILER_HOURS = 12
BAILER_DAYS = 5
FORMS = { 'eng': 'bailout_eng', 'hindi': 'bailout_hindi' }
BOTSERVER_URL = 'http://localhost:5000'
FB_PAGE_ID = '935593143497601'

In [115]:
import json
import time
import datetime
import os
import records
import pandas as pd
import numpy as np
import requests
from toolz import dissoc, get_in

def get_ref(d):
    md = get_in(['message', 'metadata'], d)
    if md:
        return json.loads(md).get('ref')

def get_df(conn_string):
    db = records.Database(conn_string)
    dat = db.query('select * from messages')
    dat = (r.as_dict() for r in dat)
    dat = ({**json.loads(r['content']), 'userid': r['userid']} for r in dat)
    dat = (dissoc(d, 'recipient', 'sender') for d in dat)
    dat = ({**d, 'text': get_in(['message', 'text'], d), 'ref': get_ref(d)} for d in dat)
    dat = ({**d, 'event_type': get_in(['event', 'type'], d), 'ref': get_ref(d)} for d in dat)
    dat = (dissoc(d, 'message', 'referral', 'user', 'page', 'postback', 'event', 'data') for d in dat)
    return pd.DataFrame(list(dat))

def current_timestamp():
    return int(time.mktime(datetime.datetime.now().timetuple())) * 1000

def get_times(df):
    df = df.sort_values('timestamp')
    args = np.argwhere((df.ref == 'QQQ').values)[:, 0]
    timestamp = df.iloc[args[0]].timestamp
    last_seen = df.iloc[args[-1]].timestamp
    try:
        next_seen = df.iloc[args[-1] + 1].timestamp
    except IndexError:
        next_seen = current_timestamp()

    userid = df.userid.iloc[0]
    lang = df.lang.iloc[0]

    continuation = df.iloc[args[-1]:].shape[0]

    return pd.DataFrame([{ 'userid': userid, 'lang': lang, 'time': timestamp, 'pause': next_seen - last_seen, 'continuation': continuation }])


def get_blocked(df, hours):
    df = df.copy()

    previously_bailed = df[df['event_type'] == 'bailout'].userid.unique()
    df = df[~df.userid.isin(previously_bailed)]
    df = df[(df.source == 'messenger') & df.text.notna()].reset_index(drop=True)

    engs = df.text.str.contains('Hey! Have a look at these videos') == True
    hindis = df.text.str.contains('Kripaya is sandesh ko Messenger par apne mitron ko bhejen') == True

    df.loc[engs, 'ref'] = 'QQQ'
    df.loc[hindis, 'ref'] = 'QQQ'

    eng_users = df[engs].userid.unique()
    hindi_users = df[hindis].userid.unique()

    eng_affected = df[df.userid.isin(eng_users)].reset_index(drop=True)
    eng_affected['lang'] = 'eng'

    hindi_affected = df[df.userid.isin(hindi_users)].reset_index(drop=True)
    hindi_affected['lang'] = 'hindi'

    affected = pd.concat([eng_affected, hindi_affected]).reset_index(drop=True).sort_values(['userid', 'timestamp'])
    pauses = affected.groupby('userid').apply(get_times).reset_index(drop=True)
    blocked = pauses[(pauses.pause > 1000*60*60*hours) & (pauses.continuation < 6)].reset_index(drop=True)

    return blocked

def get_bailouts(blocked, days):
    blocked['bailout_time'] = blocked.time.map(lambda i: datetime.datetime.fromtimestamp(i/1000)) + datetime.timedelta(days=days)
    return blocked[blocked['bailout_time'] < datetime.datetime.now()]

def conn_string():
    user = os.getenv('CHATBASE_USER')
    password = os.getenv('CHATBASE_PASSWORD')
    db = os.getenv('CHATBASE_DATABASE')
    host = os.getenv('CHATBASE_HOST')
    port = os.getenv('CHATBASE_PORT')
    return f'cockroachdb://{user}:{password}@{host}:{port}/{db}'


def _bail(page, user, form):
    return { 'event': {'type': 'bailout',
                       'value': {'form': form}},
             'user': user,
             'page': page }

def bailout(user, lang):
    form = FORMS[lang]
    page = FB_PAGE_ID
    dat = _bail(page, user, form)
    res = requests.post(f'{BOTSERVER_URL}/synthetic', json=dat)
    return res

In [116]:
df = get_df(conn_string())
blocked = get_blocked(df, BAILER_HOURS)
bails = get_bailouts(blocked, BAILER_DAYS)