# ROCK: Reasoning About Commonsense Causality

Use this notebook for performing CCR using ROCK

In [2]:
from __future__ import print_function, absolute_import, division

%load_ext autoreload
%autoreload 2
%matplotlib widget

import sys, os, json, time, datetime, logging, warnings, multiprocessing, itertools
import tqdm, json, sqlite3, ast
from pathlib import Path
import pandas as pd
import numpy as np
import torch
import nltk, spacy
import transformers, allennlp
from transformers import (AutoTokenizer, AutoModelForMaskedLM,
                          RobertaModel,RobertaForMaskedLM, 
                          RobertaTokenizer, GPT2LMHeadModel, GPT2Tokenizer)
import allennlp_models
import allennlp_models.pretrained

In [6]:
import src
import src.pipeline
import src.utils as utils


In [8]:
print(torch.cuda.is_available())
TORCH_DEV = torch.device(f'cuda:0') if torch.cuda.is_available() \
                                    else torch.device("cpu")

logging.getLogger('allennlp.common.params').disabled = True 
logging.getLogger('allennlp.nn.initializers').disabled = True 
logging.getLogger('allennlp.modules.token_embedders.embedding').setLevel(logging.INFO) 
logging.getLogger('urllib3.connectionpool').disabled = True 
logging.getLogger().setLevel(logging.CRITICAL)
warnings.filterwarnings('ignore')
logging.disable(sys.maxsize)

True


In [9]:
def console_log(msg, end='\n'):
    os.write(1, ('[LOG/{}]'.format(multiprocessing.current_process().name)+msg+end).encode('utf-8'))


def col_print(*args, cw=12, sep='|'):
    print(f" {sep} ".join(('{'+f":<{cw}"+'}').format(s) for s in args))
    
def set_seed(seed):
    np.random.seed(seed)
    torch.manual_seed(seed)
    if torch.cuda.is_available():
        torch.cuda.manual_seed_all(seed)
    
def set_ts_seed():
    set_seed(int(str(datetime.datetime.now().timestamp()).replace('.', '')) % (2 ** 31))

In [10]:
DATA_PATH = Path("./exp_data")
MODEL_PATH = Path("./models")

In [11]:
# set_seed(hsh('random_string') % (2 ** 31))
set_ts_seed()

## Load Dataset

In [34]:
copa_dev = pd.read_json(DATA_PATH / "copa_dev.json", lines=True, orient='records').set_index('idx')
copa_test = pd.read_csv(DATA_PATH / "copa_test.json")
glt_d1 glt_d1 = pd.read_csv(DATA_PATH / "glucose_d1_probs.csv")

# ROCK Pipeline

In [13]:
spacy_model = spacy.load('en_core_web_md')
allensrl = src.pipeline.AllenSRLWrapper(allennlp_models.pretrained.load_predictor("structured-prediction-srl-bert", cuda_device=1))

## Temporal Predictor

As a bare minimum, a customized temporal predictor needs to overwrite the `predict` method.
Below is the implmentation we used based on mask language modeling.

```python
class TempPredictor:
    def __init__(self, model, tokenizer, device, spacy_model="en_core_web_sm"):
        self._model = model
        self._model.to(device)
        self._model.eval()
        self._tokenizer = tokenizer
        self._mtoken = self._tokenizer.mask_token
        self.unmasker = transformers.pipeline("fill-mask", model=self._model, tokenizer=self._tokenizer, device=0)
        try:
            self._spacy = spacy.load(spacy_model)
        except Exception as e:
            self._spacy = spacy.load("en_core_web_sm")
            print(f"Failed to load spacy model {spacy_model}, use default 'en_core_web_sm'\n{e}")


    def predict(self, e1, e2, top_k=5):
        """
        returns
        """
        txt = self._remove_punct(e1) + " " + self._mtoken + " " + self._sent_lowercase(e2)
        return self.unmasker(txt, top_k=top_k)


    def get_temp(self, e1, e2, top_k=5, crop=1):
        inst1 = self.predict(e1, e2, top_k)
        inst2 = self.predict(e2, e1, top_k)

        # e1 before e2
        b1 = self._extract_token_prob(inst1, "before", crop=crop)
        b2 = self._extract_token_prob(inst2, "after", crop=crop)

        # e1 after e2
        a1 = self._extract_token_prob(inst1, "after", crop=crop)
        a2 = self._extract_token_prob(inst2, "before", crop=crop)

        return (b1+b2)/2, (a1+a2)/2

    def __call__(self, *args, **kwargs):
        return self.get_temp(*args, **kwargs)
    
    # other methods omitted
```

**NB** Fine-tuned RoBERTa model checkpoint can be downloaded using [this anonymous Dropbox link](https://www.dropbox.com/s/9egrzn1ny3oq2qa/roberta_ft.tar.gz?dl=0) (1.29GB).

In [32]:
tp_roberta_ft = src.pipeline.TempPredictor(
    model=RobertaForMaskedLM.from_pretrained(MODEL_PATH/"roberta_ft"),
    tokenizer=RobertaTokenizer.from_pretrained("roberta-base"),
    device=TORCH_DEV
)

tp_roberta_base = src.pipeline.TempPredictor(
    model=RobertaForMaskedLM.from_pretrained("roberta-base"),
    tokenizer=RobertaTokenizer.from_pretrained("roberta-base"),
    device=TORCH_DEV
)

##### Sanity Check

In [35]:
utils.test_copa_run(copa_dev.iloc[5], tp_roberta_base, tp_roberta_ft, top_k=5)

Premise: I doubted the salesman's pitch.
C1: I turned his offer down.
C2: He persuaded me to buy the product.
Question: effect	Correct choice: Choice 1

I doubted the salesman's pitch. <---> I turned his offer down.
Base model:	before: 0.000	after: 0.000
FT model:	before: 0.486	after: 0.514

I doubted the salesman's pitch. <---> He persuaded me to buy the product.
Base model:	before: 0.015	after: 0.000
FT model:	before: 0.488	after: 0.511


## Event Sampler

The event sampler subclasses `EventGenerator`.
As a bare minimum, a custom implmentation should provide the `__call__` method.

```python
class EventGenerator:
    def __init__(self, model, tokenizer, spacy_model, device):
        self.model = model.to(device)
        self.tokenizer = tokenizer
        self.tokenizer.pad_token = self.tokenizer.eos_token
        self.device = device

    def __call__(self, prompt, max_length=30, **kwargs):
        # pass
```

Below is our wrapper for GPT-J that is used in our paper:


```python

class GPTJGenerator:
    def __init__(self, model, tokenizer, device=None):

        self.model = model
        
        if device is not None:
            self.model = self.model.to(device)

        self.tokenizer = tokenizer
        self.tokenizer.pad_token = self.tokenizer.eos_token
        self.device = device

    def __call__(self, prompt, **kwargs):
        output_id = self.model.generate(self.tokenizer(prompt, return_tensors="pt", padding=True).input_ids, **kwargs)
        return self.tokenizer.batch_decode(output_id)

```

In [14]:
from transformers import AutoTokenizer, AutoModelForCausalLM
gptj_tokenizer = AutoTokenizer.from_pretrained("EleutherAI/gpt-j-6B")
gptj_model = AutoModelForCausalLM.from_pretrained("EleutherAI/gpt-j-6B")

In [23]:
gpt_generator = src.pipeline.GPTJGenerator(model=gptj_model, tokenizer=gptj_tokenizer)

In [26]:
gen_kwargs = dict(max_length=30,
                  do_sample=True,
                  temperature=0.9,
                  num_return_sequences=10,
                 )

In [43]:
gen_sents = gpt_generator("The man turned on the faucet.", **gen_kwargs)

In [44]:
print("\n".join(gen_sents))

The man turned on the faucet. His face was a mask of concentration, and his hands were steady as he washed the car.


The man turned on the faucet. It was a small plastic one, nothing fancy. His hands shook, but he managed to turn the water
The man turned on the faucet. He drank from it until the bottle was empty.

After a moment he pulled the cap off the
The man turned on the faucet. Water began splashing into the sink. He was washing the dishes, his expression blank, his eyes dead
The man turned on the faucet.

Bruno had to take a step back. The water made a sound. The man looked
The man turned on the faucet. The water gushed out. A loud splash, and water went everywhere, flowing down the man's body
The man turned on the faucet. He watched it fill with cold water. He heard it gurgle into the sink. He watched it
The man turned on the faucet. He scrubbed down the sink and the tub, washed the toilet bowl, then flushed. In the meantime
The man turned on the faucet. A strong current of wa

## Interventions

As a bare minimum, the intervention generator should
implement `__call__` method that takes a prompt and additional
kwargs as arguments and return a list of interventions.

```python
class InterventionGenerator:
    def __init__(self, **kwargs):
        pass
    
    def __call__(self, prompt, **kwargs:)
        pass

```


We use PolyJuice in our implementation based on their implementation [0].


[0] https://github.com/tongshuangwu/polyjuice

In [36]:
cf_gen = src.pipeline.PJGenerator(srl_processor=allensrl)


In [37]:
cf_texts = cf_gen("The man turned off the faucet.",
      ctrl_codes=[
          "resemantic", 
          "negation", 
          "lexical",
          "quantifier"
          "insert",
          "restructure",
          "shuffle",
          "delete"
                 ]
                 )

In [40]:
print("\n".join(cf_texts['resemantic']))

The woman turned off the faucet.
The man turned off the faucet.
The man poured water from the watering can into the pitcher until the watering can was empty. off the faucet.
The man replaced the bell brand off the faucet.
The man lit the cigarette off the faucet.
The man turned off the water main.
The man turned off the dishwasher.
The man turned off the fan and replaced it with a fan outside of his house.


## Processing Datasets for Evaluation

### Construct DataFrames for processing

In [45]:
def gen_copa_proc_df(copa):
    return pd.DataFrame(list(itertools.chain.from_iterable(
        [[s[0], 'premise', s[1]['premise']]] if s[1]['question'] == 'effect'
        else [[s[0], 'choice1', s[1]['choice1']], [s[0], 'choice2', s[1]['choice2']]]
        for s in copa.iterrows()
    )), columns=['index', 'name', 'text'])

In [46]:
copa_proc = gen_copa_proc_df(copa_dev)

In [47]:
copa_proc.head()

Unnamed: 0,index,name,text
0,0,premise,The man turned on the faucet.
1,1,premise,The girl found a bug in her cereal.
2,2,premise,The woman retired.
3,3,premise,I wanted to conserve energy.
4,4,choice1,The cook froze it.


We can apply the components row by row, but it is more efficient to let
each component batch process the data

#### Sample Covariates

In [51]:
def sample_cov(df, model, tokenizer):
    output_ids = []
    for s in df.iterrows():
        prompt = f"{s[1]['text']} Before that, "

        gen_tokens = model.generate(tokenizer(prompt,
                          return_tensors="pt", padding=True).input_ids, 
                    do_sample=True,
                    temperature=0.9,
                    max_length=30,
                    num_return_sequences=100,
            )
        output_ids.append(gen_tokens)
    return [tokenizer.batch_decode(tks) for tks in output_ids]



In [53]:
copa_proc['covariates'] = sample_cov(copa_proc, gptj_model, gptj_tokenizer)

#### Generating Interventions

In [54]:
def get_interventions(self, s, cf_gen, **kwargs):
    interventions = self.cf_gen(s, gen_kwargs=kwargs)
    intvers = list(itertools.chain(*[ints for _, ints in interventions.items()]))
    self.last_gen['interventions'] = intvers
    return intvers

In [55]:
copa_proc['interventions'] = copa_proc.apply(lambda s : get_interventions(s, cf_gen, 
                                ctrl_codes=[
                                      "resemantic", 
                                      "negation", 
                                      "lexical",
                                      "quantifier"
                                      "insert",
                                      "restructure",
                                      "shuffle",
                                      "delete"
                                ], axis=1)

### Obtain Temporal Probabilities

In [57]:
# use `utils.glt_get_probs`
# if working on glucose-d1
copa_proc = copa_proc.apply(lambda s : utils.copa_get_probs(s, model=tp_roberta_ft, 
                                                            top_k=5, spacy_model=spacy_model), 
                            axis=1)



#### Add a few columns

In [60]:
def postproc_copa(df):
    df['label_idx'] = df['name'].apply(
        lambda s: -1 if s == 'premise' else int(s[-1])-1
    )

    df['outcome'] = df.apply(lambda s:
        None if s['label_idx'] == -1 else copa_test.iloc[s['index']]['premise'], axis=1)


    tmp_df = df[df['label_idx']==-1].copy()
    tmp_df['label_idx'] = 1
    df.loc[df['label_idx']==-1, 'label_idx']=0
    df = pd.concat([df, tmp_df])
    return df

In [63]:
copa_proc = postproc_copa(df)

#### Save Data

In [64]:
copa_proc.head()

Unnamed: 0,index,name,text,covariates,interventions,outcome,label_idx,p_xd,p_dy,p_xy
0,0,premise,The man turned on the faucet.,"[""The man turned on the faucet. Before that, h...","['The man chose to turned on the faucet.', 'At...",The toilet filled with water.,0,"[[(0.210379958152771, 0.30398909747600555, 0.0...","[(0.45566828548908234, 0.4977114349603653), (0...","[(0.04539947956800461, 0.03326283395290375), (..."
1,0,premise,The man turned on the faucet.,"[""The man turned on the faucet. Before that, h...","['The man chose to turned on the faucet.', 'At...",Water flowed from the spout.,1,"[[(0.210379958152771, 0.30398909747600555, 0.0...","[(0.5247508734464645, 0.4717450588941574), (0....","[(0.3355148509144783, 0.25840914994478226), (0..."
2,1,premise,The girl found a bug in her cereal.,"[""The girl found a bug in her cereal. Before t...",['While digging for well water Monica stayed o...,She poured milk in the bowl.,0,"[[(0.5899830460548401, 0.39629100263118744, 0....","[(0.47232694923877716, 0.5270598828792572), (0...","[(0.5464454889297485, 0.40834908187389374), (0..."
3,1,premise,The girl found a bug in her cereal.,"[""The girl found a bug in her cereal. Before t...",['While digging for well water Monica stayed o...,She lost her appetite.,1,"[[(0.5899830460548401, 0.39629100263118744, 0....","[(0.4848470687866211, 0.5141351819038391), (0....","[(0.6019861996173859, 0.38734038174152374), (0..."
4,2,premise,The woman retired.,"[""The woman retired. Before that, she'd writte...","['Suddenly the woman retired.', 'The author ra...",She received her pension.,0,"[[(0.3462740257382393, 0.30608220398426056, 0....","[(0.4986240118741989, 0.49911610782146454), (0...","[(0.5325719714164734, 0.45344893634319305), (0..."


In [None]:
copa_proc.to_csv(DATA_PATH/"copa_dev_probs.csv")

## Next Steps

Please see `result_presentation.ipynb` notebook for evaulation.