In [101]:
# import gymnasium as gym
import math
import random
import matplotlib
import matplotlib.pyplot as plt
from collections import namedtuple, deque
from itertools import count

import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
import tqdm
import copy
from imp import reload
import rl_dqn
import environment
import embedding

from rl_dqn import ReplayMemory, DQN, Transition
from embedding import EmbeddingModel
from environment import OriginAgent, DialougeEnv
from transformers import pipeline
from torch.utils.tensorboard import SummaryWriter


In [102]:
reload(rl_dqn)
reload(environment)
reload(embedding)

from rl_dqn import ReplayMemory, DQN, Transition
from embedding import EmbeddingModel
from environment import OriginAgent, DialougeEnv

In [3]:
embedding_model = EmbeddingModel()

Some weights of the model checkpoint at xlm-roberta-base were not used when initializing XLMRobertaModel: ['lm_head.bias', 'lm_head.dense.bias', 'lm_head.decoder.weight', 'lm_head.layer_norm.weight', 'lm_head.dense.weight', 'lm_head.layer_norm.bias']
- This IS expected if you are initializing XLMRobertaModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing XLMRobertaModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


In [4]:
unmasker = pipeline('fill-mask', model='xlm-roberta-base')

In [118]:
reload(rl_dqn)
reload(environment)
reload(embedding)


<module 'embedding' from '/home/xu_zhang01/Thesis/RL/embedding.py'>

In [5]:
reload(environment)
test_agent = environment.OriginAgent('../data/swords/swords-v1.1_dev.json.gz')
train_agent = environment.OriginAgent('../data/swords/swords-v1.1_test.json.gz')


In [105]:
class RLModel:
    
    def __init__(self,env,device):
        self.BATCH_SIZE = 128
        self.TAU = 0.005
        self.LR = 1e-4
        self.GAMMA = 0.99
        # Get number of actions from gym action space
        n_actions = env.action_space.n
        # Get the number of state observations
        state, info = env.reset()
        n_observations = len(state)

        self.policy_net = rl_dqn.DQN(n_observations, n_actions).to(device)
        self.target_net = rl_dqn.DQN(n_observations, n_actions).to(device)
        self.target_net.load_state_dict(self.policy_net.state_dict())
        self.optimizer = optim.AdamW(self.policy_net.parameters(), lr=self.LR, amsgrad=True)
        self.memory = ReplayMemory(10000)
        self.env = env
        self.device = device
        self.steps_done = 0
        
    def caculate_threshold(self):
        EPS_START = 0.9
        EPS_END = 0.05
        EPS_DECAY = 10000

        eps_threshold = EPS_END + (EPS_START - EPS_END) * \
                        math.exp(-1. * self.steps_done / EPS_DECAY)
        return eps_threshold
        
        
    def select_action(self, state,eps_threshold=None):
        # global steps_done
        sample = random.random()
        eps_threshold = self.caculate_threshold() if  eps_threshold is None else 0
        if sample >= eps_threshold:
            with torch.no_grad():
                # t.max(1) will return the largest column value of each row.
                # second column on max result is index of where max element was
                # found, so we pick action with the larger expected reward.
                return self.policy_net(state).max(1)[1].view(1, 1), eps_threshold
        else:
            return torch.tensor([[self.env.action_space.sample()]], device=device, dtype=torch.long), eps_threshold


    def optimize_model(self):
        if len(self.memory) < self.BATCH_SIZE:
            return
        transitions = self.memory.sample(self.BATCH_SIZE)
        # Transpose the batch (see https://stackoverflow.com/a/19343/3343043 for
        # detailed explanation). This converts batch-array of Transitions
        # to Transition of batch-arrays.
        batch = Transition(*zip(*transitions))

        # Compute a mask of non-final states and concatenate the batch elements
        # (a final state would've been the one after which simulation ended)
        non_final_mask = torch.tensor(tuple(map(lambda s: s is not None,
                                              batch.next_state)), device=self.device, dtype=torch.bool)
        non_final_next_states = torch.cat([s for s in batch.next_state
                                                    if s is not None])
        state_batch = torch.cat(batch.state)
        action_batch = torch.cat(batch.action)
        reward_batch = torch.cat(batch.reward)

        # Compute Q(s_t, a) - the model computes Q(s_t), then we select the
        # columns of actions taken. These are the actions which would've been taken
        # for each batch state according to policy_net
        state_action_values = self.policy_net(state_batch).gather(1, action_batch)

        # Compute V(s_{t+1}) for all next states.
        # Expected values of actions for non_final_next_states are computed based
        # on the "older" target_net; selecting their best reward with max(1)[0].
        # This is merged based on the mask, such that we'll have either the expected
        # state value or 0 in case the state was final.
        next_state_values = torch.zeros(self.BATCH_SIZE, device=self.device)
        with torch.no_grad():
            next_state_values[non_final_mask] = self.target_net(non_final_next_states).max(1)[0]
        # Compute the expected Q values
        expected_state_action_values = (next_state_values * self.GAMMA) + reward_batch

        # Compute Huber loss
        criterion = nn.SmoothL1Loss()
        loss = criterion(state_action_values, expected_state_action_values.unsqueeze(1))

        # Optimize the model
        self.optimizer.zero_grad()
        loss.backward()
        # In-place gradient clipping
        torch.nn.utils.clip_grad_value_(self.policy_net.parameters(), 100)
        self.optimizer.step()

    def train(self, num_episodes=3000,start_episodes=0):
        
        return_list = []
        episodes_list = []
        
        for i_episode in tqdm.tqdm(range(start_episodes,start_episodes+num_episodes),desc='RL Training'):
            # Initialize the environment and get it's state
            returns = 0
            state, info = self.env.reset()
            state = torch.tensor(state, dtype=torch.float32, device=self.device).unsqueeze(0)
            
            for t in count():
                action, eps_threshold = self.select_action(state)
                writer.add_scalar('eps_threshold', eps_threshold, self.steps_done)
                self.steps_done += 1

                observation, reward, terminated, truncated, _ = self.env.step(action.item())
                reward = torch.tensor([reward], device=device)
                returns += reward
                done = terminated or truncated
                if terminated:
                    next_state = None
                else:
                    next_state = torch.tensor(observation, dtype=torch.float32, device=self.device).unsqueeze(0)
                # Store the transition in memory
                self.memory.push(state, action, next_state, reward)
                # Move to the next state
                state = next_state
                # Perform one step of the optimization (on the policy network)
                self.optimize_model()
                # Soft update of the target network's weights
                # θ′ ← τ θ + (1 −τ )θ′
                target_net_state_dict = self.target_net.state_dict()
                policy_net_state_dict = self.policy_net.state_dict()
                for key in policy_net_state_dict:
                    target_net_state_dict[key] = policy_net_state_dict[key]*self.TAU + target_net_state_dict[key]*(1-self.TAU)
                #self.target_net.load_state_dict(target_net_state_dict)
                if done:
                    return_list.append(returns)
                    episodes_list.append(copy.copy(self.env.history))
                    # plot_durations()
                    break
            
            writer.add_scalar('train/returns_episode', returns, i_episode)
            if i_episode%500==0:
                test_episodes_list, Rewards, accurate_match_rate, loose_match_rate = self.evaluate(test_env,eva_tag='eva test:')
                writer.add_scalar('test/Rewards_all', Rewards, self.steps_done)
                writer.add_scalar('test/accurate_match_rate', accurate_match_rate, i_episode)
                writer.add_scalar('test/loose_match_rate', loose_match_rate, i_episode)
                train_episodes_list, Rewards, accurate_match_rate, loose_match_rate = self.evaluate(train_env,eva_tag='eva train:')
                writer.add_scalar('train/Rewards_all', Rewards, self.steps_done)
                writer.add_scalar('train/accurate_match_rate', accurate_match_rate, i_episode)
                writer.add_scalar('train/loose_match_rate', loose_match_rate, i_episode)
        return test_episodes_list, train_episodes_list
    
    def evaluate(self,eva_env,size=None,eva_tag=''):
        Rewards = 0
        episodes_list = []
        if size is None:
            size = len(eva_env.OA.context_ids)
        for context_id in tqdm.tqdm(eva_env.OA.context_ids[:size],desc=eva_tag,mininterval=3):
            state, info = eva_env.reset(context_id)
            state = torch.tensor(state, dtype=torch.float32, device=self.device).unsqueeze(0)
            for t in count():
                action, _ = self.select_action(state,eps_threshold=0)
                observation, reward, terminated, truncated, _ = eva_env.step(action.item())
                done = terminated or truncated
                if terminated:
                    next_state = None
                else:
                    next_state = torch.tensor(observation, dtype=torch.float32, device=self.device).unsqueeze(0)
                # Move to the next state
                state = next_state
                Rewards += reward
                if done:
                    episodes_list.append(copy.copy(eva_env.history))
                    # plot_durations()
                    break
        
        accurate_match = []
        loose_match = []
        
        for eps in episodes_list:
            for i,utter in enumerate(eps):
                if 'is_right_action' in utter:
                    accurate_match.append(utter['is_right_action'])
                    loose_match.append(eps[i-1]['action'] in utter['loose_right_actions'])
        accurate_match_rate = np.array(accurate_match).sum() / len(accurate_match)
        loose_match_rate = np.array(loose_match).sum() / len(loose_match)
        print(Rewards, accurate_match_rate, loose_match_rate)
        return episodes_list, Rewards, accurate_match_rate, loose_match_rate


In [113]:
reload(environment)
test_agent = environment.OriginAgent('../data/swords/swords-v1.1_dev.json.gz')
train_agent = environment.OriginAgent('../data/swords/swords-v1.1_test.json.gz')

test_env = environment.DialougeEnv(test_agent,embedding_model,unmasker)
train_env = environment.DialougeEnv(train_agent,embedding_model,unmasker)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [114]:
writer = SummaryWriter('rl_log_lemma_words_5num')
rl_model = RLModel(train_env,device)

In [115]:
test_episodes_list, train_episodes_list = rl_model.train(3000,0)

RL Training:   0%|                                                                     | 0/3000 [00:00<?, ?it/s]
eva test::   0%|                                                                        | 0/370 [00:00<?, ?it/s][A
eva test::  30%|██████████████████▊                                           | 112/370 [00:03<00:07, 35.34it/s][A
eva test::  59%|████████████████████████████████████▋                         | 219/370 [00:06<00:04, 31.15it/s][A
eva test:: 100%|██████████████████████████████████████████████████████████████| 370/370 [00:11<00:00, 32.86it/s][A


-1481.4999999999743 0.06827794561933535 0.8368580060422961



eva train::   0%|                                                                       | 0/762 [00:00<?, ?it/s][A
eva train::  20%|████████████▏                                                | 153/762 [00:03<00:12, 50.71it/s][A
eva train::  40%|████████████████████████▍                                    | 306/762 [00:07<00:11, 38.11it/s][A
eva train::  56%|██████████████████████████████████                           | 426/762 [00:10<00:08, 38.24it/s][A
eva train::  71%|███████████████████████████████████████████▌                 | 544/762 [00:14<00:06, 36.06it/s][A
eva train:: 100%|█████████████████████████████████████████████████████████████| 762/762 [00:19<00:00, 39.95it/s][A
RL Training:   0%|                                                           | 5/3000 [00:30<3:50:25,  4.62s/it]

-3031.699999999815 0.0691786870768325 0.8110097144539299


RL Training:  17%|█████████▊                                                 | 499/3000 [01:31<05:37,  7.41it/s]
eva test:: 100%|████████████████████████████████████████████████████████████| 370/370 [00:00<00:00, 4346.45it/s][A


352.9999999999995 0.6108108108108108 0.6108108108108108



eva train:: 100%|████████████████████████████████████████████████████████████| 762/762 [00:01<00:00, 435.14it/s][A
RL Training:  17%|█████████▉                                                 | 504/3000 [01:33<09:57,  4.18it/s]

624.7999999999922 0.583989501312336 0.583989501312336


RL Training:  33%|███████████████████▋                                       | 999/3000 [02:16<03:50,  8.68it/s]
eva test:: 100%|████████████████████████████████████████████████████████████| 370/370 [00:00<00:00, 4379.35it/s][A


352.9999999999995 0.6108108108108108 0.6108108108108108



eva train:: 100%|███████████████████████████████████████████████████████████| 762/762 [00:00<00:00, 4420.64it/s][A
RL Training:  33%|███████████████████▎                                      | 1001/3000 [02:17<04:52,  6.83it/s]

624.7999999999922 0.583989501312336 0.583989501312336


RL Training:  50%|████████████████████████████▉                             | 1498/3000 [03:08<02:38,  9.46it/s]
eva test:: 100%|████████████████████████████████████████████████████████████| 370/370 [00:00<00:00, 4176.17it/s][A


352.9999999999995 0.6108108108108108 0.6108108108108108



eva train:: 100%|███████████████████████████████████████████████████████████| 762/762 [00:00<00:00, 4823.81it/s][A
RL Training:  50%|█████████████████████████████                             | 1501/3000 [03:08<02:32,  9.85it/s]

624.7999999999922 0.583989501312336 0.583989501312336


RL Training:  67%|██████████████████████████████████████▋                   | 1999/3000 [03:46<01:01, 16.21it/s]
eva test::   0%|                                                                        | 0/370 [00:00<?, ?it/s][A
eva test:: 100%|██████████████████████████████████████████████████████████████| 370/370 [00:04<00:00, 83.83it/s][A


382.9999999999987 0.6128205128205129 0.6512820512820513



eva train:: 100%|████████████████████████████████████████████████████████████| 762/762 [00:01<00:00, 493.53it/s][A
RL Training:  67%|██████████████████████████████████████▋                   | 2001/3000 [03:52<11:08,  1.49it/s]

646.1999999999917 0.5885416666666666 0.59375


RL Training:  83%|████████████████████████████████████████████████▏         | 2494/3000 [04:26<00:35, 14.42it/s]
eva test::   0%|                                                                        | 0/370 [00:00<?, ?it/s][A
eva test::  23%|██████████████▎                                                | 84/370 [00:03<00:10, 26.73it/s][A
eva test::  45%|███████████████████████████▋                                  | 165/370 [00:06<00:08, 24.24it/s][A
eva test::  68%|██████████████████████████████████████████▏                   | 252/370 [00:09<00:04, 25.91it/s][A
eva test:: 100%|██████████████████████████████████████████████████████████████| 370/370 [00:16<00:00, 23.11it/s][A


371.7999999999984 0.5727069351230425 0.6868008948545862



RL Training:  83%|████████████████████████████████████████████████▏         | 2494/3000 [04:44<00:35, 14.42it/s][A
eva train::  23%|██████████████                                               | 176/762 [00:03<00:12, 47.12it/s][A
eva train::  42%|█████████████████████████▍                                   | 318/762 [00:07<00:10, 43.55it/s][A
eva train::  59%|███████████████████████████████████▉                         | 449/762 [00:10<00:07, 42.09it/s][A
eva train:: 100%|█████████████████████████████████████████████████████████████| 762/762 [00:16<00:00, 47.18it/s][A
RL Training:  83%|████████████████████████████████████████████████▎         | 2501/3000 [04:58<18:16,  2.20s/it]

703.3999999999899 0.5922897196261683 0.655373831775701


RL Training: 100%|██████████████████████████████████████████████████████████| 3000/3000 [05:33<00:00,  9.01it/s]


In [112]:
state,info = train_env.reset()
train_env.history

[{'text': 'He began with small animals\n                    and moved on to derelicts and Undersiders, people who would never be missed. Now\n                    he had thought to use vacationers like Tasha and me, and when someone came\n                    looking, he would say we had gone island-hopping in our windboat. Our boat would\n                    disappear into the ocean to be found or not as the wind and tides chose.',
  'target': 'came',
  'lemma_target': 'came',
  'substitutes': [('arrive', 0.7),
   ('appear', 0.6),
   ('begin', 0.4),
   ('approach', 0.3),
   ('enter', 0.3)],
  'lemma_subs': [('arrive', 0.7),
   ('appear', 0.6),
   ('begin', 0.4),
   ('approach', 0.3),
   ('enter', 0.3)],
  'offset': 219,
  'role': 'user',
  'option_words': [('exit', 0.39),
   ('enter', 0.167),
   ('leave', 0.043),
   ('end', 0.03),
   ('go', 0.021),
   ('escape', 0.017),
   ('close', 0.016),
   ('continue', 0.012),
   ('stay', 0.011),
   ('stop', 0.01)],
  'mask_text': 'He began with sma

In [18]:
episodes_list, Rewards, accurate_match_rate, loose_match_rate = rl_model.evaluate(env)
Rewards, accurate_match_rate, loose_match_rate

100%|██████████| 370/370 [01:39<00:00,  3.72it/s]


(-149.99999999999855, 0.2938775510204082, 0.3489795918367347)

In [22]:
import time

In [24]:
def foo():
    for i in tqdm.tqdm(range(100),desc='get nothing',mininterval=3):
        i +1
        time.sleep(1)
    

In [215]:
filter_words = '''</s>
.
..
?
s
''
!”
.”
”
nos
mr
ve
'''.split()
filter_words

['</s>', '.', '..', '?', 's', "''", '!”', '.”', '”', 'nos', 'mr', 've']

In [217]:
for context_id in test_env.OA.context_ids[:50]:
    context_id = random.choice(test_env.OA.context_ids)
    option_words,prompt_sentence = test_env.get_option_words_by_llm(context_id=context_id,use_cache=True)
    state,info = test_env.reset(context_id)
    subs = test_env.history[0]['substitutes']
    words = [s for s,score in option_words]
    if not set(words) & set(filter_words):
        print(words,'      ',[s for s,score in subs[:8]])
# print('--------------------------------------------------------------------------')
# print(subs[:5])

['said', 'explained', 'reveal', 'announced', 'say']        ['state', 'report', 'declare', 'reveal', 'announce', 'disclose', 'claim', 'note']
['crawl', 'slide', 'move', 'climb', 'jump']        ['wriggle', 'slide', 'crawl', 'scoot', 'move', 'glide', 'slip', 'coast']
['reasonable', 'sensible', 'rational', 'clear', 'coherent']        ['rational', 'justifiable', 'sensible', 'reasonable', 'clear', 'relevant', 'plausible', 'legit']
['producer', 'director', 'manager', 'critic', 'actor']        ['maker', 'producer', 'supervisor', 'controller', 'exec', 'executive', 'key player', 'overseer']
['buy', 'get', 'pay', 'find', 'give']        ['secure', 'attain', 'gain', 'procure', 'obtain', 'acquire', 'get hands on', 'be given']
['strong', 'powerful', 'big', 'huge', 'certain']        ['big', 'sturdy', 'certain', 'well-established', 'powerful', 'secure', 'firm', 'substantial']
['happened', 'happen', 'occur', 'come', 'happens']        ['happen', 'occur', 'come about', 'transpire', 'materialize', 'occure'

In [220]:
unmasker(prompt_sentence,top_k=10)

[{'score': 0.3038056790828705,
  'token': 43334,
  'token_str': 'dark',
  'sequence': 'Her candy lips twisted into a frown.The dark glow grew stronger and larger, filling the air for about a foot around her hands..The pitchblack glow grew stronger and larger, filling the air for about a foot around her hands..The shady glow grew stronger and larger, filling the air for about a foot around her hands..The gloomy glow grew stronger and larger, filling the air for about a foot around her hands..The shadowy glow grew stronger and larger, filling the air for about a foot around her hands..The lightless glow grew stronger and larger, filling the air for about a foot around her hands..The dark glow grew stronger and larger, filling the air for about a foot around her hands.In her palms, the droplets of blood swelled into small pools.'},
 {'score': 0.08765923231840134,
  'token': 22556,
  'token_str': 'black',
  'sequence': 'Her candy lips twisted into a frown.The dark glow grew stronger and la

In [224]:
import nltk
nltk.download('wordnet')

[nltk_data] Downloading package wordnet to
[nltk_data]     /home/xu_zhang01/nltk_data...
[nltk_data]   Package wordnet is already up-to-date!


True

In [229]:
from nltk.stem import WordNetLemmatizer

lemmatizer = WordNetLemmatizer()
sentence = "The cats were chasing mice"
words = sentence.split()

lemmatized_words = [lemmatizer.lemmatize(word) for word in words]
lemmatized_sentence = " ".join(lemmatized_words)

print(lemmatized_sentence)


The cat were chasing mouse


In [228]:
# import these modules
from nltk.stem import WordNetLemmatizer

lemmatizer = WordNetLemmatizer()

print("rocks :", lemmatizer.lemmatize("rocks"))
print("corpora :", lemmatizer.lemmatize("corpora"))

# a denotes adjective in "pos"
print("better :", lemmatizer.lemmatize("better", pos ="a"))


rocks : rock
corpora : corpus
better : good


In [227]:
nltk.download('omw-1.4')


[nltk_data] Downloading package omw-1.4 to
[nltk_data]     /home/xu_zhang01/nltk_data...


True

In [155]:
state,info = test_env.reset(context_id)
test_env.history[0]['substitutes'][:10]

[('pull', 0.4),
 ('bring', 0.4),
 ('have', 0.3),
 ('secure', 0.3),
 ('draw', 0.3),
 ('capture', 0.3),
 ('hustle', 0.3),
 ('acquire', 0.3),
 ('win', 0.3),
 ('defeat', 0.3)]

In [None]:
right_action    

In [113]:
right_action_match = []
for eps in eval_episode_list:
    for i,utter in enumerate(eps):
        if 'right_action' in utter:
            right_action_match.append(utter['right_action'][0] == eps[i-1]['action'])
cnt_right = np.array(right_action_match).sum()
len_right = len(right_action_match)
print(cnt_right,len_right,cnt_right/len_right)

175 580 0.3017241379310345


In [49]:
eval_episode_list[0]

NameError: name 'eval_episode_list' is not defined

In [None]:
right_action_matcht3_episodes_list

In [None]:
[eval_episode_list

In [139]:
import pandas as pd

In [159]:
import  random

In [133]:
keys = ['role','text','target','option_words','reward','is_right_action','action','loose_right_actions']
t3_episodes_list = [epi for epi in test_episodes_list if len(epi)>3]

episode = random.choice(t3_episodes_list)
reward_all = 0
for utter in episode:
    role = utter['role']
    for k in keys:
        if k in utter:
            if k=='reward':
                reward_all += utter[k]
            if role=='user':
                print(k.ljust(15,' '),':',utter[k])
            else:
                print(k.rjust(35,' '),':',utter[k])
    print("---------------------------------------------------")
print("total reward:  ", reward_all)

role            : user
text            : “I …I noticed him from across the room when I was looking around for Rachel. He was standing right in front of some blue lights, so I couldn’t see him very well, but I noticed the way that the light set off his pale skin. It almost looked like the light bent around him without actually touching him directly.
target          : very
option_words    : [('very', 0.275), ('really', 0.258), ('quite', 0.196), ('particularly', 0.135), ('extremely', 0.047), ('rather', 0.023), ('incredibly', 0.007), ('fairly', 0.007), ('especially', 0.006), ('truly', 0.004)]
---------------------------------------------------
                               role : bot
                               text : The word very is not clear to me. Do you mean something like very,really,quite,particularly,extremely,rather,incredibly,fairly,especially,truly ?
                       option_words : [('very', 0.275), ('really', 0.258), ('quite', 0.196), ('particularly', 0.135), ('extrem

In [119]:
len(t3_episodes_list)

60

In [500]:
test_env.history

[{'text': "FAMILY PETS are improving recovery rates of patients at Columbia Hospital, Milwaukee. Patients who receive canine or feline visitors are found to have lower blood pressure and improved appetite and be more receptive to therapy, says Mary Ann O'Loughlin, program coordinator.  \n\nTIRED OF TRIMMING?",
  'target': 'receptive',
  'substitutes': [('acceptant', 0.7),
   ('welcoming', 0.7),
   ('approachable', 0.6),
   ('acceptive', 0.5),
   ('open', 0.5),
   ('responsive', 0.4),
   ('open to suggestions', 0.3),
   ('favorable', 0.3),
   ('open-minded', 0.3),
   ('open to new ideas', 0.3),
   ('persuadable', 0.2),
   ('suggestible', 0.2),
   ('hospitable', 0.2),
   ('influenceable', 0.1),
   ('accessible', 0.1),
   ('friendly', 0.1),
   ('amenable', 0.1),
   ('ready', 0.1),
   ('quick on the uptake', 0.0),
   ('sympathetic', 0.0),
   ('sensitive', 0.0),
   ('bright', 0.0),
   ('perceptive', 0.0),
   ('susceptible', 0.0),
   ('swayable', 0.0),
   ('pushover', 0.0),
   ('well-dispose

In [502]:
test_env.target

'receptive'

In [134]:

print(episodes_list[0][0]['target'])

print(episodes_list[0][0]['substitutes'][:5])
print("--------------------")
print(episodes_list[0][0]['mask_text'])

print(episodes_list[0][0]['option_words'])

NameError: name 'episodes_list' is not defined

In [517]:
env.mask_model.encode

<transformers.pipelines.fill_mask.FillMaskPipeline at 0x7f067b69b760>

In [522]:
env.mask_model('''It was not an attractive face right now; her ebony
eyes shadowed by hours on the watch, full lips pursed with frustration. She had
the look of every leader she had ever known. She had
the look of every leader she had ever be acquaint with. She had
the look of every leader she had ever know. She had 
the look of every leader she had ever be acquainted with. She had
the look of every leader she had ever have knowledge of. She had
the look of every leader she had ever experience. She had
the look of every leader she had ever <mask>. At six foot two, she stood a full
head taller than even her Arrallin first officer.''')

[{'score': 0.3854154944419861,
  'token': 3714,
  'token_str': 'know',
  'sequence': 'It was not an attractive face right now; her ebony eyes shadowed by hours on the watch, full lips pursed with frustration. She had the look of every leader she had ever known. She had the look of every leader she had ever be acquaint with. She had the look of every leader she had ever know. She had the look of every leader she had ever be acquainted with. She had the look of every leader she had ever have knowledge of. She had the look of every leader she had ever experience. She had the look of every leader she had ever know. At six foot two, she stood a full head taller than even her Arrallin first officer.'},
 {'score': 0.15135060250759125,
  'token': 51529,
  'token_str': 'known',
  'sequence': 'It was not an attractive face right now; her ebony eyes shadowed by hours on the watch, full lips pursed with frustration. She had the look of every leader she had ever known. She had the look of every lea

In [520]:
env.mask_model(episodes_list[0][0]['mask_text'])

[{'score': 0.2613705098628998,
  'token': 3714,
  'token_str': 'know',
  'sequence': 'It was not an attractive face right now; her ebony eyes shadowed by hours on the watch, full lips pursed with frustration.She had the look of every leader she had ever known..She had the look of every leader she had ever be acquaint with..She had the look of every leader she had ever know..She had the look of every leader she had ever be acquainted with..She had the look of every leader she had ever have knowledge of..She had the look of every leader she had ever experience..She had the look of every leader she had ever know.At six foot two, she stood a full head taller than even her Arrallin first officer.'},
 {'score': 0.1795056164264679,
  'token': 51529,
  'token_str': 'known',
  'sequence': 'It was not an attractive face right now; her ebony eyes shadowed by hours on the watch, full lips pursed with frustration.She had the look of every leader she had ever known..She had the look of every leader 

In [None]:
'\n'.

In [57]:
import numpy as np
np.argwhere([True,False,True,False]).reshape(-1)

array([0, 2])

In [19]:
eval_episode_list[1]

[{'text': 'Nepthys turned to me. “Well, kid, what do you think? Remember, this is your quest.',
  'target': 'think',
  'substitutes': [('have in mind', 0.9),
   ('suppose', 0.8),
   ('presume', 0.6),
   ('reckon', 0.6),
   ('conclude', 0.5),
   ('speculate', 0.5),
   ('suspect', 0.5),
   ('feel', 0.4),
   ('imagine', 0.4),
   ('suggest', 0.4),
   ('believe', 0.4),
   ('anticipate', 0.3),
   ('reflect', 0.3),
   ('surmise', 0.3),
   ('gather', 0.3),
   ('understand', 0.3),
   ('foresee', 0.3),
   ('deem', 0.3),
   ('determine', 0.2),
   ('expect', 0.2),
   ('fancy', 0.2),
   ('ponder', 0.2),
   ('consider', 0.2),
   ('reason', 0.2),
   ('see', 0.2),
   ('stop to consider', 0.2),
   ('guess', 0.2),
   ('intellectualize', 0.2),
   ('envision', 0.2),
   ('want', 0.2),
   ('use ones head', 0.2),
   ('estimate', 0.1),
   ('appraise', 0.1),
   ('sort out', 0.1),
   ('contemplate', 0.1),
   ('assume', 0.1),
   ('discern', 0.1),
   ('conceive', 0.1),
   ('stew', 0.1),
   ('project', 0.1),
   ('

In [97]:
episodes_list[-1][1]

{'text': 'The word payable is not clear to me. Do you mean something like due,paid,pay,payment,available ?',
 'option_words': [('due', 0.81),
  ('paid', 0.135),
  ('pay', 0.009),
  ('payment', 0.003),
  ('available', 0.002)],
 'action': 2,
 'role': 'bot'}

In [32]:
from enum import Enum

# 定义一个枚举类型
class Action(Enum):
    NO_ACTION = 0
    CONFIRM = 1
    OPTION = 2
    EXPLAIN = 3


In [38]:
for action in Action:
    print(action.value)

0
1
2
3


In [None]:
pd.set_option('display.max_colwidth', 20)

In [122]:
import pandas as pd
from collections import Counter
Counter(pd.DataFrame([e[2] for e in episodes_list[-100:]])['is_right_action'].values)

Counter({True: 58, False: 42})

In [125]:
print('hello'.rjust(10, ' '))
# 输出 '# 输出 'hello   

     hello


In [126]:
episodes_list[0]

[{'text': 'Months earlier, we might have sought a bed, a couch, or a\n                    comfortable chair at this point. Instead, I asked, "Is he handsome?" \n                 "You\'re jealous."',
  'target': 'Instead',
  'substitutes': [('alternatively', 0.6),
   ('however', 0.6),
   ('alternately', 0.5),
   ('rather', 0.4),
   ('on second thought', 0.4),
   ('in lieu', 0.3),
   ('as a substitute', 0.3),
   ('alternative', 0.3),
   ('in place of', 0.3),
   ('on behalf of', 0.3),
   ('rather than', 0.2),
   ('preferably', 0.0),
   ('in preference', 0.0),
   ('quietly', 0.0),
   ('actually', 0.0)],
  'offset': 111,
  'role': 'user'},
 {'text': 'The word Instead is not clear to me. Do you mean something like how,</s>,then,and,so ?',
  'option_words': [('how', 0.254),
   ('</s>', 0.131),
   ('then', 0.025),
   ('and', 0.022),
   ('so', 0.021)],
  'action': 2,
  'role': 'bot'},
 {'text': 'none of these',
  'reward': -1,
  'is_right_action': False,
  'role': 'user'},
 {'text': 'The word I

In [105]:
turn = 0
for his in env.history:
    space_num = 0 if turn%2==0 else (135-len(his['text']))
    # print(turn, space_num) 
    print(' '*space_num + his['text'])
    print('----------------------------------')
    turn += 1

Electronic theft by foreign and industrial spies and disgruntled
				employees is costing U.S. companies billions and eroding their
				international competitive advantage. That was the message delivered by
				government and private security experts at an all-day conference on
				corporate electronic espionage. "Hostile and even friendly nations
				routinely steal information from U.S. companies and share it with their
				own companies," said Noel D. Matchett, a former staffer at the federal
				National Security Agency and now president of Information Security Inc.,
				Silver Spring, Md.
----------------------------------
                   The word government is not clear to me. Do you mean something like government,state,authority,official,authorities ?
----------------------------------
authority
----------------------------------
                   The word government is not clear to me. Do you mean something like government,state,authority,official,authorities ?
------------

In [107]:
import copy

In [None]:
copy.copy()

In [127]:
env.reward_table

{0: {True: {'answer': '', 'reward': 2, 'terminated': True},
  False: {'answer': ' you misunderstdood my words, I mean...',
   'reward': -2,
   'terminated': True}},
 1: {True: {'answer': 'Yes, it is', 'reward': 1.5, 'terminated': True},
  False: {'answer': 'No, it is not ', 'reward': -1.5, 'terminated': False}},
 2: {True: {'answer': None, 'reward': 1, 'terminated': True},
  False: {'answer': ' none of these', 'reward': -1, 'terminated': False}},
 3: {True: {'answer': 'the explain content',
   'reward': 0.5,
   'terminated': False},
  False: {'answer': 'it is obviously, but I will try explain it too',
   'reward': -0.5,
   'terminated': True}}}

In [96]:
def get_best_action(self):
    option_words = self.get_option_words_by_llm(self.context_id)
    should_no_action = self.should_no_action(option_words)
    should_confirm = self.should_confirm(option_words)
    should_opt = self.should_opt(option_words)
    should_explain = self.should_explain(option_words)

    right_action = [should_no_action, should_confirm, should_opt, should_explain]
    return right_action.index(

In [102]:
get_best_action(env).index(False)

1

In [74]:
option = env.get_option_words_by_llm(context_id)
env.get_best_action()

array([2])

## Expierment

In [4]:
state,info =  env.reset()

In [18]:
env.history[0]

question(text='I imagine that people do not come here for unimportant reasons.”\n            It hissed thoughtfully. “True, true.', target='hissed', substitutes=[('seethe', 0.4), ('jeer', 0.3), ('make buzzing sound', 0.3), ('whirr', 0.3), ('whistle', 0.2), ('sputter', 0.2), ('mock', 0.2), ('shrill', 0.1), ('boo', 0.1), ('deride', 0.0), ('blow', 0.0), ('whiz', 0.0), ('rasp', 0.0), ('hoot', 0.0), ('say', 0.0), ('wheeze', 0.0), ('disapprove', 0.0), ('damn', 0.0), ('sizzle', 0.0), ('catcall', 0.0), ('buzz', 0.0), ('sibilate', 0.0), ('revile', 0.0), ('ridicule', 0.0), ('spit', 0.0), ('sigh', 0.0), ('condemn', 0.0), ('shout down', 0.0), ('whisper', 0.0), ('decry', 0.0), ('siss', 0.0)])

In [None]:
self = env

In [514]:
h0 = self.history[0]

mask_show = ''
solo_mask = h0.text.replace(h0.target,'<mask>',1)
mask_text = ''
for i,(word,score) in enumerate(h0.substitutes[:5]):
    if len(h0.text)*6<2100:
        text = h0.text
    else:
        subtract_len = len(h0.text)-350
        index = h0.text.index(h0.target)
        if i%2==0:
            pre_sub_index = min(0+subtract_len,index-20)
            text = h0.text[pre_sub_index:]
        else:
            post_sub_index = max(len(h0.text)-subtract_len,index+20)
            text = h0.text[:post_sub_index]
    text.replace(h0.target,word,1)
    if len(self.model.tokenizer(mask_text+text+solo_mask)['input_ids'])>512:
        break
    mask_text += text.replace(h0.target,word,1)
    # mask_show += text.replace(h0.target,f"\033[31m{word}\033[0m",1)+'\033[31m<\nnewline>\033[0m'

In [None]:
#

[('even', 0.913),
 ('and', 0.022),
 ('particularly', 0.007),
 ('especially', 0.003),
 ('although', 0.003)]

In [546]:
state,info = env.reset()

In [547]:
env.history

[question(text='In its center, under a spearing, white light, was a golden table draped with a blue velvet cloth, on which lay a gray, plum-sized rock.\n            “A marvel, isn’t it?” she said.', target='marvel', substitutes=[('wonder', 0.9), ('phenomenon', 0.6), ('miracle', 0.5), ('amazement', 0.5), ('awed', 0.2), ('be amazed be', 0.1), ('genius', 0.0), ('goggle', 0.0), ('prodigy', 0.0), ('gaze', 0.0), ('feel surprise', 0.0), ('stare', 0.0), ('stand in awe', 0.0), ('be surprised', 0.0), ('sensation', 0.0), ('gape', 0.0)])]

In [548]:
env.get_option_words_by_llm()

[('wonder', 0.686),
 ('miracle', 0.053),
 ('magic', 0.03),
 ('strange', 0.012),
 ('wonderful', 0.01)]

In [465]:
from environment import Action

##  evaluate

In [531]:
reload(environment)

<module 'environment' from '/mnt/d/BaiduSyncdisk/intelligent_interactive_system/Thesis/RL/environment.py'>

In [80]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [43]:
len(context_id_state_mapping)

12

# Cache

In [81]:
context_id_state_mapping = {}
context_option_mapping = {}
for env in [train_env,test_env]:
    for context_id in tqdm.tqdm(env.OA.context_ids):
        # if (context_id in context_id_state_mapping) and (context_id in context_option_mapping):
        #     continue
        state,info = env.reset(context_id)
        option_words = env.get_option_words_by_llm(context_id,False)
        context_id_state_mapping[context_id] = state
        context_option_mapping[context_id] = option_words
    
import pickle
with open("state.pkl","wb") as f:
    pickle.dump(context_id_state_mapping,f)
with open("option.pkl","wb") as  f:
    pickle.dump(context_option_mapping,f  )

100%|██████████| 762/762 [18:49<00:00,  1.48s/it]
100%|██████████| 370/370 [09:25<00:00,  1.53s/it]


In [65]:
 with open("option.pkl","b") as f:
    my_obj = pickle.load(f)

ValueError: Must have exactly one of create/read/write/append mode and at most one plus

In [15]:
del context_id_state_mapping 

In [14]:
(64*768*370)/(1024*1024)

17.34375

In [10]:
context_id_state_mapping['c:c28336fbaeb6942c1454706a864cdf89c4535313'].shape

torch.Size([768])

In [32]:
from transformers import AutoTokenizer, AutoModelForMaskedLM
import spacy

nlp = spacy.load("en_core_web_sm")
tokenizer = AutoTokenizer.from_pretrained('xlm-roberta-base')


In [35]:
def get_option_words_by_llm(self,context_id):
    # state, info = test_env.reset()
    # h0 =self.history[0]


    def repeat_part(sent,target,substitutes,trunck=False):
        rep_list = []
        substitutes = [w for w,s in substitutes[:5]]
        for sub in [target]+substitutes+['<mask>']:
            start = offset-30 if trunck else 0
            post_start = offset-sent.start_char+len(target)
            post_end = post_start+30 if trunck else  100000000
            repeat_part = f"{sent.text[start:offset-sent.start_char]}{sub}{sent.text[post_start:post_end]}"
            rep_list.append(repeat_part)
        return '.'.join(rep_list)

    h0,_ = self.OA.sample(context_id)
    # h0 = self.history[0]

    offset = h0['offset']
    mask_sentence_list = []
    for sent in nlp(h0['text']).sents:
        print(sent.start_char, offset ,sent.end_char)
        if sent.start_char <= offset <sent.end_char:
            sent_text = repeat_part(sent, h0['target'], h0['substitutes'])
            mask_sentence_list.append(sent_text)
        else:
            mask_sentence_list.append(sent.text)

    mask_text = ''.join(mask_sentence_list)
    token_lens = len(tokenizer(''.join(mask_text))['input_ids'])

    if token_lens>512:
        mask_sentence_list = []
        for sent in nlp(h0['text']).sents:
            if sent.start_char <= offset <sent.end_char:
                sent_text = repeat_part(sent, h0['target'], h0['substitutes'],True)
                # print('-------------')
                mask_sentence_list.append(sent_text)
            else:
                mask_sentence_list.append(sent.text)
        mask_text = ''.join(mask_sentence_list)
        token_lens = len(tokenizer(''.join(mask_text))['input_ids'])                
    # words = [(token['token_str'],round(token['score'],3)) for token in self.mask_model(mask_text)]
    return mask_text


In [33]:
h0 = env.history[0]
h0['text'][h0['offset']:]

'Let’s go.”\n            As we headed down the sidewalk, I said, “What is your name, anyway?”'

In [49]:
tokenizer(get_option_words_by_llm(test_env,context_id))

0 128 95
96 128 445
446 128 499


{'input_ids': [0, 87, 1902, 959, 90698, 450, 87, 13648, 959, 33022, 47, 30698, 4, 1284, 47, 1992, 4745, 35978, 5, 3827, 45188, 70, 5551, 77968, 67, 28302, 23, 6, 5, 7077, 16065, 70, 5551, 77968, 67, 28302, 23, 6, 5, 107, 75161, 70, 5551, 77968, 67, 28302, 23, 6, 5, 43866, 107, 70, 5551, 77968, 67, 28302, 23, 6, 5, 3827, 38931, 70, 5551, 77968, 67, 28302, 23, 6, 5, 987, 19, 16065, 70, 5551, 77968, 67, 28302, 23, 6, 5, 250001, 70, 5551, 77968, 67, 28302, 23, 47009, 642, 23409, 1810, 100, 10, 72399, 186857, 5, 2], 'attention_mask': [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]}

In [18]:
env.OA.sample()

({'text': 'It was last February, after the winter break, that we moved in together. Now spring was back, under the concrete, and I could smell it even here.\n            “Isn’t it an amazing night, Rache?',
  'target': 'even',
  'substitutes': [('still', 0.4),
   ('as well as', 0.2),
   ('so much as', 0.1),
   ('much', 0.1),
   ('in spite of', 0.0),
   ('yet', 0.0),
   ('despite', 0.0),
   ('notwithstanding', 0.0),
   ('indeed', 0.0),
   ('actually', 0.0),
   ('disregarding', 0.0),
   ('more', 0.0),
   ('yet all the', 0.0)],
  'offset': 135},
 'c:8449d1484b624c8db76ae3d9c60a000f677a244d')

In [536]:
returns = 0
action_list = []
turns_list = []
reward_list = []
for i in tqdm.tqdm(range(200)):
    context_id = test_env.OA.context_ids[i]
    state, info = test_env.reset(context_id)
    state = torch.tensor(state, dtype=torch.float32, device=device).unsqueeze(0)
    episode_returns = 0
    for t in count():
        action = select_action(state,eps_threshold=None)
        action_list.append(action.item())
        observation, reward, terminated, truncated, _ = test_env.step(action.item())
        state = observation.reshape((1,-1))
        reward = torch.tensor([reward], device=device)
        episode_returns += reward
        # returns += reward
        done = terminated or truncated
        
        if done:
            turns_list.append(t)
            reward_list.append(episode_returns)
            # plot_durations()


            turn = 0
            for his in env.history:
                space_num = 0 if turn%2==0 else (135-len(his.text))
                role = "User:" if  turn%2==0 else  "Bot:"
                # print(role)
                # print(his.text)
                # print('----------------------------------')
                turn += 1
            break

    # break

  state = torch.tensor(state, dtype=torch.float32, device=device).unsqueeze(0)
100%|█████████████████████████████████████████████████████████████████████████| 200/200 [18:37<00:00,  5.59s/it]


In [538]:
from collections import Counter
import pandas as pd

In [539]:
Counter(action_list)

Counter({2: 919, 1: 16, 3: 21, 0: 13})

In [540]:
import pandas as pd

In [541]:
data_act = Counter(action_list) # = 
name_map = {0:'no action',1:'confirm',2:'options',3:'more info'}
pd.DataFrame([data_act],index=['number of occurance']).rename(name_map,axis=1).T

Unnamed: 0,number of occurance
options,919
confirm,16
more info,21
no action,13


In [542]:
df2 = pd.DataFrame([Counter(turns_list)]).T.reset_index()
df2.columns = ['turns','Number of Occurrence']
df2['turns'] += 1
df2

Unnamed: 0,turns,Number of Occurrence
0,5,188
1,2,4
2,3,2
3,1,3
4,4,3


In [135]:
eps_threshold = EPS_END + (EPS_START - EPS_END) * \
        math.exp(-1. * steps_done / EPS_DECAY)

In [214]:
eps_threshold

0.4036247594865046

In [None]:
self = origin_agent

In [54]:
context_id = random.choice(self.context_ids)
context = self.contexts[context_id]['context']
target = self.contexts[context_id]['targets'][0]
target_text = target['target']
substitutes = [(sub['substitute'],sub['label_score']) for sub in target['substitutes']]
sorted_subs = sorted(substitutes,key=lambda x:x[1],reverse=True)

In [113]:
import numpy as np

In [43]:
return_list

[tensor([-0.1000])]

In [42]:
average_return = []
len_avg = 10
for i in range(len(return_list)-len_avg):
    average_return.append(np.mean([s.item() for s in return_list[i:i + len_avg]]))

In [40]:
np.mean([s.item() for s in return_list[i:i+10]])

NameError: name 'np' is not defined

In [19]:
from transformers import AutoTokenizer, AutoModelForMaskedLM
tokenizer = AutoTokenizer.from_pretrained('xlm-roberta-base')

In [20]:
import spacy

nlp = spacy.load("en_core_web_sm")
tokenizer = AutoTokenizer.from_pretrained('xlm-roberta-base')


In [113]:
def repeat_part(sent,target,substitutes,trunck=False):
    rep_list = []
    substitutes = [w for w,s in substitutes[:5]]
    for sub in [target]+substitutes+['<mask>']:
        start = offset-30 if trunck else 0
        repeat_part = f"{sent.text[start:offset-sent.start_char]}{sub}{sent.text[offset-sent.start_char+len(target):]}"
        rep_list.append(repeat_part)
    return '.'.join(rep_list)

In [94]:
import tqdm

In [98]:
test_origin_agent = OriginAgent(file_path='../data/swords/swords-v1.1_test.json.gz')
test_env = environment.DialougeEnv(test_origin_agent,embedding_model,unmasker)

In [114]:
for con in tqdm.tqdm(env.OA.context_ids):
    state,info = env.reset(con)
    h0 = env.history[0]
    offset = h0['offset']
    mask_sentence_list = []
    for sent in nlp(h0['text']).sents:
        if sent.start_char < offset <sent.end_char:
            sent_text = repeat_part(sent, h0['target'], h0['substitutes'])
            # print('-------------')
            mask_sentence_list.append(sent_text)
        else:
            mask_sentence_list.append(sent.text)

    mask_text = ''.join(mask_sentence_list)
    token_lens = len(tokenizer(''.join(mask_text))['input_ids'])
    token_lens, mask_text
    
    if token_lens>512:
        mask_sentence_list = []
        for sent in nlp(h0['text']).sents:
            if sent.start_char < offset <sent.end_char:
                sent_text = repeat_part(sent, h0['target'], h0['substitutes'],True)
                # print('-------------')
                mask_sentence_list.append(sent_text)
            else:
                mask_sentence_list.append(sent.text)
        mask_text = ''.join(mask_sentence_list)
        token_lens = len(tokenizer(''.join(mask_text))['input_ids'])
        if token_lens>512:
            print(token_lens)

100%|█████████████████████████████████████████████████████████████████████████████████████████████████████| 370/370 [00:34<00:00, 10.59it/s]


In [111]:
for sent in nlp(h0['text']).sents:
    print("-----------")
    print(sent)
    print(sent)
# print(mask_text)

-----------
The FBI cited a vivid conversation with Anissina's mother in
which Tokhtakhounov assured her that even if her daughter "falls,
we will make sure she is No. 1."


   After the Winter Olympics, a French judge, Marie-Reine Le
Gougne, was suspended by the International Skating Union for not
reporting pressure she said was put on her by Didier Gailhaguet,
president of the French Skating Federation, to vote for the Russian
pairs team.


   
-----------
She later recanted and said that Canadian officials had
pressured her.


In [53]:
sent.text[]

'B'

In [28]:
for i,t in enumerate(text[0:19]):
    print(i,t,end=';')

0 ";1 W;2 e;3  ;4 w;5 e;6 r;7 e;8  ;9 a;10 l;11 l;12  ;13 a;14 p;15 p;16 a;17 l;18 l;

In [37]:
sent.start_char

117

In [65]:
sent.as_doc

<function Span.as_doc>

In [35]:
origin_agent

<environment.OriginAgent at 0x7fc5603b8b80>

In [35]:
import spacy

# 加载英文语言模型
nlp = spacy.load("en_core_web_sm")

In [None]:
def truncat(text):
    text_list = []
    
    
    

In [103]:
state,info = env.reset()

In [106]:
h0 = env.history[0]
mask_text_doc = nlp(h0.text)
mask_text = ''
# 遍历每个句子并打印
for sentence in mask_text_doc.sents:
    # print(sentence.text)
    if h0.target in sentence.text:
        words = [h0.tarnlp = spacy.load("en_core_web_sm")get,*[w for w,score in h0.substitutes[:5]],'<mask>']
        sub_sentence_text = ".".join([sentence.text.replace(h0.target,w,1) for w in words])
        mask_text += sub_sentence_text
    else:
        mask_text += sentence.text
if len(tokenizer(mask_text)['input_ids'])>10:
    mask_text = ''
    # 遍历每个句子并打印
    for sentence in mask_text_doc.sents:
        # print(sentence.text)
        if h0.target in sentence.text:
            parts = sentence.text.split(",")
            mask_idx = [i for i,p in enumerate(parts) if (h0.target in p)][0]
            words = [h0.target,*[w for w,score in h0.substitutes[:7]],'<mask>']
            sub_sentence_text = ",".join([parts[mask_idx].replace(h0.target,w,1) for w in words])
            parts[mask_idx] = sub_sentence_text
            mask_text += ' '.join(parts)
        else:
            mask_text += sentence.text

print(h0.text)
print("-------------------------------------")
print(mask_text)
print(f"-----------{h0.target}--------------")
print(h0.substitutes)
print("-------------------------")
print([(token['token_str'],round(token['score'],3)) for token in unmasker(mask_text)])
print("-------------------------")
print([(token['token_str'],round(token['score'],3)) for token in unmasker(solo_mask)])

len(tokenizer(h0.text)['input_ids'])

list>   

Words of wisdom


   Julia Child doesn't have much use for fads and trends -- never
has.
-------------------------------------
list>   

Words of wisdom


   Julia Child doesn't have much use for fads and trends -- never
has.,list>   

Words of the wise


   Julia Child doesn't have much use for fads and trends -- never
has.,list>   

Words of knowledge


   Julia Child doesn't have much use for fads and trends -- never
has.,list>   

Words of advice


   Julia Child doesn't have much use for fads and trends -- never
has.,list>   

Words of astuteness


   Julia Child doesn't have much use for fads and trends -- never
has.,list>   

Words of sageness


   Julia Child doesn't have much use for fads and trends -- never
has.,list>   

Words of enlightenment


   Julia Child doesn't have much use for fads and trends -- never
has.,list>   

Words of expertise


   Julia Child doesn't have much use for fads and trends -- never
has.,list>   

Words of <mask>


   Julia Child doesn't

26

In [107]:
h0 = env.history[0]
mask_text_doc = nlp(h0.text)
mask_text = ''
# 遍历每个句子并打印
for sentence in mask_text_doc.sents:
    # print(sentence.text)
    if h0.target in sentence.text:
        words = [h0.target,*[w for w,score in h0.substitutes[:7]],'<mask>']
        random.shuffle(words)
        sub_sentence_text = ".".join([sentence.text.replace(h0.target,w,1) for w in words])
        mask_text += sub_sentence_text
    else:
        mask_text += sentence.text
if len(tokenizer(mask_text)['input_ids'])>512:
    mask_text = ''
    # 遍历每个句子并打印
    for sentence in mask_text_doc.sents:
        # print(sentence.text)
        if h0.target in sentence.text:
            parts = sentence.text.split(",")
            mask_idx = [i for i,p in enumerate(parts) if (h0.target in p)][0]
            words = [h0.target,*[w for w,score in h0.substitutes[:5]],'<mask>']
            random.shuffle(words)
            sub_sentence_text = ",".join([parts[mask_idx].replace(h0.target,w,1) for w in words])
            parts[mask_idx] = sub_sentence_text
            mask_text += ' '.join(parts)
        else:
            mask_text += sentence.text

print(h0.text)
print("-------------------------------------")
print(mask_text)
print(f"-----------{h0.target}--------------")
print(h0.substitutes)
print("-------------------------")
print([(token['token_str'],round(token['score'],3)) for token in unmasker(mask_text)])
print("-------------------------")
print([(token['token_str'],round(token['score'],3)) for token in unmasker(solo_mask)])

len(tokenizer(h0.text)['input_ids'])

list>   

Words of wisdom


   Julia Child doesn't have much use for fads and trends -- never
has.
-------------------------------------
list>   

Words of sageness


   Julia Child doesn't have much use for fads and trends -- never
has..list>   

Words of wisdom


   Julia Child doesn't have much use for fads and trends -- never
has..list>   

Words of knowledge


   Julia Child doesn't have much use for fads and trends -- never
has..list>   

Words of astuteness


   Julia Child doesn't have much use for fads and trends -- never
has..list>   

Words of <mask>


   Julia Child doesn't have much use for fads and trends -- never
has..list>   

Words of advice


   Julia Child doesn't have much use for fads and trends -- never
has..list>   

Words of the wise


   Julia Child doesn't have much use for fads and trends -- never
has..list>   

Words of expertise


   Julia Child doesn't have much use for fads and trends -- never
has..list>   

Words of enlightenment


   Julia Child doesn't

26

In [97]:
import random
random.shuffle(words)

In [98]:
words

['listless', '<mask>', 'lifeless', 'empty', 'dead', 'flat', 'deathly']

In [45]:
len(tokenizer(mask_text)['input_ids'])>512

True

In [None]:
if there is target in the top masks :
    that means LLM could predict well, the ambiagrous is less
    
    No action

if top mask's score is high ,and in':
    
    

In [260]:
l0s = 0
l1s=0
for i in  range(100):
    state,info = test_env.reset()
    h0 = test_env.history[0]
    l1 = len(tokenizer(h0.text)['input_ids'])
    l0 = len(h0.text)
    l0s+=l0
    l1s+=l1
    print(l0,l1,round(l0/l1,2))

117 31 3.77
276 77 3.58
142 41 3.46
404 106 3.81
258 70 3.69
129 36 3.58
89 20 4.45
358 96 3.73
294 61 4.82
313 81 3.86
426 119 3.58
213 41 5.2
292 65 4.49
152 29 5.24
204 62 3.29
251 59 4.25
425 106 4.01
266 65 4.09
280 61 4.59
321 67 4.79
65 16 4.06
375 84 4.46
546 116 4.71
194 45 4.31
148 30 4.93
183 42 4.36
134 31 4.32
207 48 4.31
510 105 4.86
297 58 5.12
277 64 4.33
242 53 4.57
219 46 4.76
243 59 4.12
369 87 4.24
420 76 5.53
226 60 3.77
296 73 4.05
131 35 3.74
111 26 4.27
192 45 4.27
164 42 3.9
93 28 3.32
446 106 4.21
336 96 3.5
420 76 5.53
245 61 4.02
258 70 3.69
290 64 4.53
154 34 4.53
293 69 4.25
197 52 3.79
224 58 3.86
195 44 4.43
153 35 4.37
112 27 4.15
129 36 3.58
386 93 4.15
226 47 4.81
270 69 3.91
343 77 4.45
422 113 3.73
171 45 3.8
540 134 4.03
335 73 4.59
259 63 4.11
293 69 4.25
103 27 3.81
185 43 4.3
153 35 4.37
325 79 4.11
277 65 4.26
73 20 3.65
433 109 3.97
245 61 4.02
354 74 4.78
129 36 3.58
376 98 3.84
232 58 4.0
595 132 4.51
267 52 5.13
475 96 4.95
154 34 4.53
201 

In [371]:
state,info = test_env.reset()
h0 = test_env.history[0]
solo_mask = h0.text.replace(h0.target,'<mask>',1)
mask_text = ''
for i,(word,score) in enumerate(h0.substitutes[:5]):
    print(len(h0.text)*6)
    if len(h0.text)*6<2100:
        text = h0.text
    else:
        subtract_len = len(h0.text)-350
        index = h0.text.index(h0.target)
        post_sub_index = max(len(h0.text)-subtract_len,index+20)
        if i%2==0:
            pre_sub_index = min(0+subtract_len,index-20)
            text = h0.text[pre_sub_index:]
    if len(tokenizer(mask_text+text+solo_mask)['input_ids'])>512:
        break
    mask_text += text
mask_text  += solo_mask
    

2502
2502
2502
2502
2502


In [373]:
mask_text

"            compromise and capitulation, between symbols and substance. Shortly before noon,\n                    I climbed the sandy path to Dream's End, rehearsing my apology, slowing only to\n                    pluck burrs from between my sandals and my feet. \n                 Malaquez answered the door in blood-red pajamas and a black silk robe.            compromise and capitulation, between symbols and substance. Shortly before noon,\n                    I climbed the sandy path to Dream's End, rehearsing my apology, slowing only to\n                    pluck burrs from between my sandals and my feet. \n                 Malaquez answered the door in blood-red pajamas and a black silk robe.            compromise and capitulation, between symbols and substance. Shortly before noon,\n                    I climbed the sandy path to Dream's End, rehearsing my apology, slowing only to\n                    pluck burrs from between my sandals and my feet. \n                 Malaquez a

In [368]:
len(mask_text)

2084

In [266]:
import random

In [280]:
random.randint(0,1)

0

In [281]:
text = 'reudToy, a pillow bearing the likeness of Sigmund Freud, is marketed as a $24.95 tool for do-it-yourself analysis.,Also spurring the move '

In [359]:
m_text = (text*12)[:-23]+ '<mask>'

In [360]:
token = tokenizer(m_text)

In [361]:
len(token['input_ids'])

512

In [362]:
unmasker(m_text)

[{'score': 0.8441550135612488,
  'token': 2,
  'token_str': '</s>',
  'sequence': 'reudToy, a pillow bearing the likeness of Sigmund Freud, is marketed as a $24.95 tool for do-it-yourself analysis.,Also spurring the move reudToy, a pillow bearing the likeness of Sigmund Freud, is marketed as a $24.95 tool for do-it-yourself analysis.,Also spurring the move reudToy, a pillow bearing the likeness of Sigmund Freud, is marketed as a $24.95 tool for do-it-yourself analysis.,Also spurring the move reudToy, a pillow bearing the likeness of Sigmund Freud, is marketed as a $24.95 tool for do-it-yourself analysis.,Also spurring the move reudToy, a pillow bearing the likeness of Sigmund Freud, is marketed as a $24.95 tool for do-it-yourself analysis.,Also spurring the move reudToy, a pillow bearing the likeness of Sigmund Freud, is marketed as a $24.95 tool for do-it-yourself analysis.,Also spurring the move reudToy, a pillow bearing the likeness of Sigmund Freud, is marketed as a $24.95 tool for

In [264]:
512*4.1

2099.2

In [265]:
2100/6

350.0

In [261]:
(l0s/l1s)

4.199420569773056

In [251]:
len(mask_text)

1559

In [250]:
h0.text.index(h0.target),len(h0.text)

(157, 387)

In [247]:
512 - len(h0.text) 

125

In [154]:
from functools import lru_cache

In [85]:
import transformers

In [None]:
transformers.AutoModelForMaskedLM(

In [7]:
from transformers import AutoModelForMaskedLM
from transformers import AutoTokenizer, AutoModelForMaskedLM
tokenizer = AutoTokenizer.from_pretrained('xlm-roberta-base')

In [88]:
model = AutoModelForMaskedLM.from_pretrained("xlm-roberta-base")

In [93]:
# prepare input
text = "Replace me by any text you'd like."
encoded_input = tokenizer(text, return_tensors='pt')

# forward pass
output = model(**encoded_input)

In [101]:
output

MaskedLMOutput(loss=None, logits=tensor([[[ 6.4861e+01,  1.6882e-02,  3.7656e+01,  ...,  2.1584e+01,
           1.4380e+01,  1.8790e+01],
         [ 2.7493e+01, -1.4091e+00,  6.4847e+01,  ...,  4.0234e+01,
           1.6296e+01,  3.0925e+01],
         [ 1.9604e+01, -1.2597e+00,  4.8981e+01,  ...,  3.5830e+01,
           1.7145e+01,  2.7173e+01],
         ...,
         [ 2.2920e+01, -1.4657e+00,  5.1211e+01,  ...,  3.8495e+01,
           1.6508e+01,  2.7687e+01],
         [ 2.8598e+01, -1.2868e+00,  6.7706e+01,  ...,  4.4857e+01,
           1.8004e+01,  3.5004e+01],
         [ 4.4955e+01, -2.1554e-01,  4.9643e+01,  ...,  2.8253e+01,
           1.6841e+01,  2.3610e+01]]], grad_fn=<ViewBackward0>), hidden_states=None, attentions=None)

In [104]:
model.__class__

transformers.models.xlm_roberta.modeling_xlm_roberta.XLMRobertaForMaskedLM

In [None]:
with torch.no_grad():
    outputs = model(input_ids)
    predictions = outputs.logits


In [105]:
from transformers import FillMaskPipeline

2023-05-17 16:07:54.349083: W tensorflow/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory
2023-05-17 16:07:54.349119: I tensorflow/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine.


In [108]:
mask_pipeline = FillMaskPipeline(model=env.model,tokenizer=tokenizer)

AttributeError: 'EmbeddingModel' object has no attribute 'config'

In [107]:
mask_pipeline("this is a <mask> car")

[{'score': 0.07687385380268097,
  'token': 34923,
  'token_str': 'beautiful',
  'sequence': 'this is a beautiful car'},
 {'score': 0.04615773260593414,
  'token': 29681,
  'token_str': 'sports',
  'sequence': 'this is a sports car'},
 {'score': 0.03222648799419403,
  'token': 54704,
  'token_str': 'classic',
  'sequence': 'this is a classic car'},
 {'score': 0.031460631638765335,
  'token': 6782,
  'token_str': 'great',
  'sequence': 'this is a great car'},
 {'score': 0.030774081125855446,
  'token': 26267,
  'token_str': 'nice',
  'sequence': 'this is a nice car'}]

In [118]:
embedding_model.model

XLMRobertaModel(
  (embeddings): XLMRobertaEmbeddings(
    (word_embeddings): Embedding(250002, 768, padding_idx=1)
    (position_embeddings): Embedding(514, 768, padding_idx=1)
    (token_type_embeddings): Embedding(1, 768)
    (LayerNorm): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
    (dropout): Dropout(p=0.1, inplace=False)
  )
  (encoder): XLMRobertaEncoder(
    (layer): ModuleList(
      (0): XLMRobertaLayer(
        (attention): XLMRobertaAttention(
          (self): XLMRobertaSelfAttention(
            (query): Linear(in_features=768, out_features=768, bias=True)
            (key): Linear(in_features=768, out_features=768, bias=True)
            (value): Linear(in_features=768, out_features=768, bias=True)
            (dropout): Dropout(p=0.1, inplace=False)
          )
          (output): XLMRobertaSelfOutput(
            (dense): Linear(in_features=768, out_features=768, bias=True)
            (LayerNorm): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
     

In [102]:
model

XLMRobertaForMaskedLM(
  (roberta): XLMRobertaModel(
    (embeddings): XLMRobertaEmbeddings(
      (word_embeddings): Embedding(250002, 768, padding_idx=1)
      (position_embeddings): Embedding(514, 768, padding_idx=1)
      (token_type_embeddings): Embedding(1, 768)
      (LayerNorm): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
      (dropout): Dropout(p=0.1, inplace=False)
    )
    (encoder): XLMRobertaEncoder(
      (layer): ModuleList(
        (0): XLMRobertaLayer(
          (attention): XLMRobertaAttention(
            (self): XLMRobertaSelfAttention(
              (query): Linear(in_features=768, out_features=768, bias=True)
              (key): Linear(in_features=768, out_features=768, bias=True)
              (value): Linear(in_features=768, out_features=768, bias=True)
              (dropout): Dropout(p=0.1, inplace=False)
            )
            (output): XLMRobertaSelfOutput(
              (dense): Linear(in_features=768, out_features=768, bias=True)
          