In [1]:
import sys; sys.path.append('../../')
import wandb
import pathlib
import os
import json
import numpy as np
import random
import tempfile
from itertools import chain
from collections import defaultdict
random.seed(42)

In [2]:
WANDB_CACHE = str(pathlib.PosixPath('~/.wandb_cache').expanduser())
VERSION     ="v0" ##<- update if you want to use a different verison

In [3]:
FILE_PATH = os.path.join(WANDB_CACHE,"aaac_raw:%s" % VERSION)
def grab_raw_data(path):
    if not os.path.isdir(FILE_PATH):
        with wandb.init() as run: 
            artifact = run.use_artifact(
                'aaac/aaac_model_runs/aaac_raw:%s' % VERSION, 
                type='dataset'
            )
            artifact_dir = artifact.download(root=FILE_PATH)
grab_raw_data(FILE_PATH) 
DATA_JSON=os.path.join(FILE_PATH,"aaac.jsonl")

In [4]:
LINES = []
with open(DATA_JSON) as my_data: 
    for line in my_data: 
        line      = line.strip()
        json_line = json.loads(line)
        LINES.append(json_line)

In [5]:
len(LINES)

10000

In [6]:
LINES[1]["reason_statements"]

[{'text': 'If, and only if, Keith is a expert of FC Vaduz and Keith is a member of FC Spartak Trnava, then Keith is a critic of FK Jablonec',
  'starts_at': 0,
  'ref_reco': 1},
 {'text': 'Keith is not a expert of PSV Eindhoven or Keith is a critic of OGC Nice',
  'starts_at': 350,
  'ref_reco': 6},
 {'text': 'if it is not the case that Keith is a expert of FC Vaduz and Keith is a member of FC Spartak Trnava, then Keith is not a critic of OGC Nice',
  'starts_at': 430,
  'ref_reco': 7},
 {'text': 'if Keith is a friend of RC Celta de Vigo, then Keith is a expert of PSV Eindhoven',
  'starts_at': 575,
  'ref_reco': 4},
 {'text': 'if it is not the case that Keith is a expert of FC Vaduz and Keith is a member of FC Spartak Trnava, then Keith is not a critic of OGC Nice',
  'starts_at': 662,
  'ref_reco': 7}]

In [7]:
list(LINES[0].keys())

['argument_source',
 'argdown_reconstruction',
 'reason_statements',
 'conclusion_statements',
 'explicit_premises',
 'explicit_premises_formalized',
 'implicit_premises',
 'implicit_premises_formalized',
 'conclusion',
 'conclusion_formalized',
 'intermediary_conclusions_formalized',
 'intermediary_conclusions',
 'id',
 'predicate_placeholders',
 'entity_placeholders',
 'steps',
 'n_premises',
 'base_scheme_groups',
 'scheme_variants',
 'domain_id',
 'domain_type',
 'plcd_subs',
 'argdown_index_map',
 'presentation_parameters']

In [8]:
train_amount = int(len(LINES)*0.8)
eval_amount  = int(len(LINES)*0.1)
random.shuffle(LINES)
train_instances = LINES[:train_amount]
dev_instances   = LINES[train_amount:train_amount+eval_amount]
test_instances  = LINES[train_amount+eval_amount:]

In [44]:
for sname,split in [
    ("train",train_instances),
    ("dev",dev_instances),
    ("test",test_instances)
]:
    ### outputfile 
    
    for k,instance in enumerate(split):
        arg_source            = instance["argument_source"]
        conclusion_statements = instance["conclusion_statements"]
        reason_statements     = instance["reason_statements"]
        argdown               = instance["argdown_reconstruction"]
 
        #### arg_source => conclusion_statements 
        arg_src = {}
        arg_src["id"]               = "%s_%d_arg_src" % (sname,k)
        arg_src["question"]         = {}
        arg_src["question"]["stem"] = "$arg$ %s" % arg_source 
        arg_src["output"]           = " ".join  %s." % c["text"].capitalize() for c in conclusion_statements])
        arg_src["prefix"]           = "conclusions:"
            
        ### arg_source => reason statements 
        arg_reason = {} 
        arg_reason["id"]               = "%s_%d_arg_reason" % (sname,k)
        arg_reason["question"]         = {}
        arg_reason["question"]["stem"] = "$arg$ %s" % arg_source 
        arg_reason["output"]           = " ".join(["$r$ %s." % c["text"].capitalize() for c in reason_statements])
        arg_reason["prefix"]           = "reasons:"
        
    
        
        
        

$r$ If, and only if, ruby is a frequent consumer of pre de provence soap, then it is not the case that ruby is a regular consumer of kiss my face soap or ruby is a occasional purchaser of crafted soap. $r$ Every loyal buyer of dermasil soap is a rare consumer of finesse shampoo or a frequent consumer of bath & body works soap. $r$ Whoever is a frequent consumer of bath & body works soap is not a loyal buyer of pears soap or a regular consumer of lush soap. $r$ Everyone who is not a loyal buyer of pears soap is neither a regular consumer of kiss my face soap nor a occasional purchaser of crafted soap. $r$ Whoever is a regular consumer of kiss my face soap or a occasional purchaser of crafted soap is not a regular consumer of lush soap. $r$ Whoever is a rare consumer of finesse shampoo is not a regular consumer of kiss my face soap and not a occasional purchaser of crafted soap, either.
$r$ Every friend of nk domžale is a friend of fc pyunik. $r$ If someone is a admirer of as saint-étien

$r$ If safrole is a ingredient of no bump and safrole is a ingredient of my secret santa, then safrole is a ingredient of free ranger blue. $r$ If safrole is a ingredient of free ranger blue, then safrole is a ingredient of 43 eye colour. $r$ If safrole is a ingredient of 43 eye colour, then safrole is a ingredient of tri pink gel. $r$ If safrole is a ingredient of tri pink gel, then safrole is a ingredient of mava-white. $r$ If safrole is a ingredient of mava-white, then safrole is a ingredient of heavenly sheen.
$r$ If someone is a member of gabala sc or a follower of pfc ludogorets 1945, then they are a ex-supporter of chelsea fc or not a opponent to kilmarnock fc. $r$ Being a ex-supporter of chelsea fc is sufficient for being a ex-supporter of arsenal fc. $r$ Whoever is not a ex-supporter of arsenal fc is a opponent to kilmarnock fc.
$r$ If, and only if, someone is not a uncle of douglas, then they are a son of corey. $r$ Being a workmate of herbert is necessary for being a uncle o

$r$ If, and only if, something is a ingredient of cheek stain, then it is a ingredient of creme-to-powder or a ingredient of almay holographics. $r$ Being a ingredient of daily uv defense is necessary for being a ingredient of cheek stain.
$r$ Every ancestor of gabriella is either a aunt of cheryl or a ancestor of brandy, or both. $r$ Everyone who is a aunt of cheryl is a close friend of tammy, too. $r$ Being a ancestor of brandy is sufficient for being a close friend of tammy. $r$ Being a ancestor of gabriella is sufficient for being a great-grandmother of bernice.
$r$ If something is a ingredient of sable - eyesilk, then it is a ingredient of it stick or a ingredient of anti-redness cream. $r$ If methyl chloride is a ingredient of anti-redness cream, then methyl chloride is a ingredient of my one blue love. $r$ If methyl chloride is a ingredient of my one blue love, then methyl chloride is a ingredient of sweet hook lacquer.
$r$ Maximo is a follower of fc rostov or maximo is a friend

$r$ If kathy is a workmate of courtney, then kathy is a workmate of gwendolyn. $r$ Kathy is a workmate of courtney or kathy is a niece of bertha. $r$ Every niece of bertha is a ancestor of deborah. $r$ Kathy is a close friend of cecelia or kathy is a ancestor of catherine. $r$ If kathy is a close friend of cecelia, then kathy is a daughter of ann. $r$ If kathy is a ancestor of catherine, then kathy is a daughter of ann.
$r$ Being a great-grandmother of ella is sufficient for being a great-grandmother of reina. $r$ There is no ancestor of georgette who is a workmate of bonnie yet not a close friend of daisy. $r$ Nobody is neither a workmate of bonnie nor a great-grandmother of ella. $r$ Every close friend of daisy is a ancestor of tommy or a close friend of florene. $r$ Being a great-grandmother of ella is necessary for being a ancestor of tommy.
$r$ If theda is a workmate of sharita, then theda is not a half-sister of dolores. $r$ If, and only if, theda is a daughter of kari or theda i

IOPub data rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_data_rate_limit`.

Current values:
NotebookApp.iopub_data_rate_limit=1000000.0 (bytes/sec)
NotebookApp.rate_limit_window=3.0 (secs)

