In [None]:
from transformers import AutoTokenizer, AutoModelForTokenClassification
from datasets import load_dataset
import torch
from torch.utils.data import DataLoader
from torch import optim
import tqdm
import numpy as np
from instruct_goose import Agent, RewardModel, RLHFTrainer, RLHFConfig, create_reference_model

In [None]:
model_base = AutoModelForTokenClassification.from_pretrained("hemangjoshi37a/autotrain-stocks-ner-2000-sample-test-1676759313", use_auth_token=True)
tokenizer = AutoTokenizer.from_pretrained("hemangjoshi37a/autotrain-stocks-ner-2000-sample-test-1676759313", use_auth_token=True)

In [None]:
import pandas as pd
from datasets import Dataset

# Load the CSV file into a pandas DataFrame
df = pd.read_csv(
    "./dataset_for_huggingface_autoTrain_label_vlassification_NEW_LOGIC.csv", usecols=["text", "label"])
df = df.dropna()

# Convert the DataFrame into a datasets.Dataset object
dataset = Dataset.from_pandas(df)

label_map = {'NANA': 0,
             'btst': 1,
             'delivery': 2,
             'enter': 3,
             'entry_momentum': 4,
             'exit': 5,
             'exit2': 6,
             'exit3': 7,
             'intraday': 8,
             'sl': 9,
             'symbol': 10,
             'touched': 11}

def preprocess_data(data):
    # Tokenize the input text
    inputs = tokenizer(data["text"], return_offsets_mapping=True,
                       padding="max_length", truncation=True)

    data["label"] = data["label"].replace(' ', '')
    data["label"] = data["label"].replace("'", '')
    data["label"] = data["label"].replace('[', '')
    data["label"] = data["label"].replace(']', '')
    data["label"] = data["label"].split(',')

    # Map the labels to integers using the label_map dictionary
    labels = [label_map[label] for label in data["label"]]

#     # Pad the labels list with -1 to a fixed size
#     max_labels_len = max(len(labels), 20)
#     labels += [-1] * (max_labels_len - len(labels))

    # Create a dictionary containing the input features and labels
    features = {
        "input_ids": inputs["input_ids"],
        "attention_mask": inputs["attention_mask"],
        "token_type_ids": inputs["token_type_ids"],
        "labels": labels
    }

    return {"input_ids": features["input_ids"],
            "attention_mask": features["attention_mask"],
            "token_type_ids": features["token_type_ids"],
            "labels": features["labels"]}


# Apply the preprocess_data function to the dataset
dataset = dataset.map(preprocess_data)

# Define the DataLoader
train_dataloader = DataLoader(dataset, shuffle=True)

Map:   0%|          | 0/1741 [00:00<?, ? examples/s]

In [None]:
num_labels = len(label_map.keys())
model_base.classifier.out_proj = torch.nn.Linear(model_base.config.hidden_size, num_labels)

# Define the reward model
reward_model = RewardModel("hemangjoshi37a/autotrain-stocks-ner-2000-sample-test-1676759313")
# Fine-tune the reward model on your task-specific data, using the Trainer class provided by the transformers library.

# Define the agent and reference model
model = Agent(model_base)
ref_model = create_reference_model(model)

# # Define the trainer
# config = reward_model.model.config
config = RLHFConfig()

if hasattr(config, "n_embd"):
    hidden_size = config.n_embd
elif hasattr(config, "hidden_size"):
    hidden_size = config.hidden_size

# custom head
reward_model.reward_head = torch.nn.Sequential(
#     torch.nn.Dropout(torch.dropout),
    torch.nn.Linear(1024, 1),
    torch.nn.Sigmoid()
)

trainer = RLHFTrainer(model, ref_model, config)
optimizer = optim.SGD(model.parameters(), lr=1e-3)

# Define the generation function
def generate_labels(input_sequence):
    inputs = tokenizer(input_sequence, return_tensors="pt")
    output = model_base(**inputs)
    labels = torch.argmax(output.logits, dim=2)
    return labels

# Train the model
max_new_labels = 20
N_EPOCH = 100

generation_kwargs = {
    "max_new_labels": max_new_labels
}

Some weights of the model checkpoint at hemangjoshi37a/autotrain-stocks-ner-2000-sample-test-1676759313 were not used when initializing BertModel: ['classifier.weight', 'classifier.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertModel were not initialized from the model checkpoint at hemangjoshi37a/autotrain-stocks-ner-2000-sample-test-1676759313 and are newly initialized: ['bert.pooler.dense.weight', 'bert.pooler.dense.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [None]:
class ClassifiedMsgClass:
        classification_number_to_name_dict = {0 : 'NANA',
                             1 : 'btst',
                             2 : 'delivery',
                             3 : 'enter',
                             4 : 'momentum',
                             5 : 'exit',
                             6 : 'exit2',
                             7 : 'exit3',
                             8 : 'intraday',
                             9 : 'sl',
                             10 : 'symbol',
                             11 : 'touched'}
        classification_name_to_number_dict = {
                            'NANA': 0 ,
                             'btst' : 1 ,
                             'delivery' : 2 ,
                             'enter' : 3 ,
                             'momentum' : 4 ,
                             'exit' : 5 ,
                             'exit2' : 6 ,
                             'exit3' : 7 ,
                             'intraday' : 8 ,
                             'sl' : 9 ,
                             'symbol' : 10 ,
                             'touched' : 11 ,
                            }
        def __init__(self,msg):
            self.msg = msg
            self.btst = ''
            self.delivery = ''
            self.enter = 0
            self.momentum =0
            self.exit = 0
            self.exit2 = 0
            self.exit3 = 0
            self.intraday = ''
            self.sl = 0
            self.symbol = ''
            self.touched = 0
            self.get_class_map_from_message_NEW(self.msg)
            
        def dict_to_self_attr(self, *initial_data, **kwargs):
            for dictionary in initial_data:
                for key in dictionary:
                    setattr(self, key, dictionary[key])
            for key in kwargs:
                setattr(self, key, kwargs[key])

            
        def get_class_map_from_message_NEW(self,input_message:str) -> dict:
            ########### PREDICT TEXT AND CLASSIFY WORDS ##########
            ip1 = tokenizer(input_message,return_tensors='pt')
            op1 = model_base(**ip1)

            current_word = ''
            sentence = []
            sentence_class= []
            sentence_class_name= []
            list_of_decoded_words = tokenizer.batch_decode(ip1['input_ids'][0])
            last_word_contained_hash = False
            last_classification_numner = 0
            last_decoded_word = ''
            for onet in range(len(ip1['input_ids'][0])):
                this_token = ip1['input_ids'][0][onet]
                this_classification = op1.logits[0][onet].tolist()
                this_decoded_word = list_of_decoded_words[onet]
                this_classification_number = np.argmax(this_classification)
                if(this_decoded_word=='[CLS]' or this_decoded_word=='[SEP]'):
                    continue
        #         print(f'{this_decoded_word=}')
        # #         print(f'{this_classification=}')
        #         print(f'{this_classification_number=}')
                this_word_contains_hash= '#' in this_decoded_word

                if('#' in this_decoded_word):
                    hash_replaced_word = this_decoded_word.replace('#','')
        #             print(f'''{hash_replaced_word=}''')
                    current_word = current_word+hash_replaced_word
        #             print(f'{current_word=}')
                    last_word_contained_hash=True
                elif((this_classification_number==last_classification_numner) and ((this_decoded_word=='.') or (last_decoded_word=='.'))):
                    last_classification_numner = this_classification_number
                    current_word = current_word+this_decoded_word
                else:
        #             print('========== insidious ===============')
                    sentence.append(current_word)
                    sentence_class.append(last_classification_numner)
                    sentence_class_name.append(self.classification_number_to_name_dict[last_classification_numner])
        #             print(f'{current_word=}')
        #             print(f'{sentence=}')
        #             print(f'{last_classification_numner=}')
        #             print(f'{sentence_class=}')
        #             print(f'{current_word=}')
                    current_word=this_decoded_word
                    last_classification_numner = this_classification_number
                    last_word_contained_hash=False
                last_decoded_word = this_decoded_word
        #         print('======================================')
            sentence.append(current_word)
            sentence_class.append(last_classification_numner)
            sentence_class_name.append(self.classification_number_to_name_dict[last_classification_numner])
            self.predictions_df = pd.DataFrame({'sentence':sentence,
                                'sentence_class':sentence_class,
                                'sentence_class_name':sentence_class_name,
                               })
            self.predictions_df = self.predictions_df.apply(pd.to_numeric, errors='coerce').fillna(self.predictions_df)
            labelled_df = self.predictions_df.loc[self.predictions_df.sentence_class_name!='']
            # display(labelled_df)
            keys = labelled_df.sentence_class_name
            values = labelled_df.sentence
            self.predictions_dict = dict(zip(keys, values))
            self.dict_to_self_attr(self.predictions_dict)
#             print(dictionary) 
            return self.predictions_dict

# class_number_to_name_dict
    

In [None]:
for epoch in tqdm.tqdm(range(N_EPOCH)):
    for batch in train_dataloader:
#         print(batch['text'])
        lbr =batch['text'][0]
#         lbr =lbr.split(' ', '')
        lbr =lbr.replace("'", '')
        lbr =lbr.replace('[', '')
        lbr =lbr.replace(']', '')
        lbr =lbr.split(',')
        
#         print(lbr)
        this_txt = " ".join(lbr)
#         print(f'{this_txt=}')
        
        inputs = tokenizer(this_txt, padding=True, truncation=True, return_tensors="pt")
        response_labels = generate_labels(batch['text'][0])
        
#         clls = ClassifiedMsgClass(this_txt)
#         response_labels = clls.predictions_df.sentence_class_name.to_list()

        # extract the generated labels
        
#         response_labels = torch.IntTensor(clls.predictions_df.sentence_class.to_list())
#         print(response_labels.reshape(-1,1))
#         response_labels = response_labels.reshape(-1,1)
        
#         response_labels = response_labels[:, -max_new_labels:]
        response_attention_mask = torch.ones_like(response_labels)
    
        # evaluate from the reward model
        with torch.no_grad():
            text_input_ids = torch.stack([torch.concat([q, r]) for q, r in zip(inputs["input_ids"], response_labels)], dim=0)
            rewards = reward_model(text_input_ids)

        # calculate PPO loss
        loss = trainer.compute_loss(
            query_ids=inputs["input_ids"],
            query_attention_mask=inputs["attention_mask"],
            response_ids=response_labels,
            response_attention_mask=response_attention_mask,
            rewards=rewards
        )
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        print(f"loss={loss}")

  0%|          | 0/100 [00:00<?, ?it/s]

loss=0.29343298077583313
loss=0.05562248080968857
loss=-0.15725748240947723
loss=0.16791892051696777
loss=-0.1843014806509018


  0%|          | 0/100 [00:53<?, ?it/s]

loss=-0.3969211280345917





KeyboardInterrupt: 

In [None]:
# Evaluate the model on the validation set
validation_dataset = load_dataset("my_dataset", split="validation")
validation_dataloader = DataLoader(validation_dataset)
# validation_dataloader = train_dataloader

total_loss = 0
for batch in validation_dataloader:
    
    lbr =batch['text'][0]
    lbr =lbr.replace("'", '')
    lbr =lbr.replace('[', '')
    lbr =lbr.replace(']', '')
    lbr =lbr.split(',')

    this_txt = " ".join(lbr)
    
    inputs = tokenizer(this_txt, padding=True, truncation=True, return_tensors="pt")
    
    response_labels = generate_labels(batch['text'][0])
    response_labels = response_labels[:, -max_new_labels:]
    response_attention_mask = torch.ones_like(response_labels)
    with torch.no_grad():
        text_input_ids = torch.stack([torch.concat([q, r]) for q, r in zip(inputs["input_ids"], response_labels)], dim=0)
        rewards = reward_model(text_input_ids)
    loss = trainer.compute_loss(
        query_ids=inputs["input_ids"],
        query_attention_mask=inputs["attention_mask"],
        response_ids=response_labels,
        response_attention_mask=response_attention_mask,
        rewards=rewards
    )
    total_loss += loss.item()
    
avg_loss = total_loss / len(validation_dataloader)
print(f"Validation loss: {avg_loss}")

KeyboardInterrupt: 

In [None]:
from transformers import AutoTokenizer, AutoModelForTokenClassification
from datasets import load_dataset
import torch
from torch.utils.data import DataLoader
from instruct import Agent, RewardModel, InstructGPTTrainer, InstructGPTConfig, create_reference_model

# Load the tokenizer and model
tokenizer = AutoTokenizer.from_pretrained("distilbert-base-uncased")
model = AutoModelForTokenClassification.from_pretrained("distilbert-base-uncased")

# Load the dataset
dataset = load_dataset("my_dataset")

# Define the reward function
def reward_function(generated_sequence):
    # Compute the reward based on the generated sequence
    return 1.0

# Define the agent and reference model
agent = Agent(model)
ref_model = create_reference_model(agent)

# Define the trainer
config = InstructGPTConfig()
trainer = InstructGPTTrainer(agent, ref_model, config)

# Train the model
train_dataloader = DataLoader(dataset)
trainer.train(train_dataloader, reward_function)


TypeError: code expected at least 16 arguments, got 15