In [4]:
import math
import statistics
from typing import Optional
from tqdm import tqdm
import torch
from peft import PeftModel,PeftConfig
from peft import get_peft_model, LoraConfig, TaskType
from prm_interface import PRM, StepScore
from torch.types import Device
from transformers import (  # type: ignore  # type: ignore
    AutoModelForCausalLM,
    AutoTokenizer,
    BitsAndBytesConfig,
)

import json

def read_json_file(file_path):

    with open(file_path, 'r') as file:
        data = json.load(file)
    return data


class math_psa_prm(PRM):
    def __init__(
        self,
        aggregation: str = "full",#the way how prm step scores will be aggregated in a solution
        quantization_config: Optional[BitsAndBytesConfig] = None,
        device: Optional[Device] = None,
    ) -> None:
        self.device = (
            device if device else ("cuda" if torch.cuda.is_available() else "cpu")
        )

        self.good_token = '+'
        self.bad_token = '-'
        self.step_tag = '\n\n\n\n\n' #ки
        self.step_tag2 = '\n\n'

        self.model_path = "Qwen/Qwen2.5-Math-7B-Instruct"

        self.tokenizer = AutoTokenizer.from_pretrained(self.model_path,add_eos_token=False)
        self.model = AutoModelForCausalLM.from_pretrained(
                self.model_path,
                torch_dtype=torch.bfloat16).to(self.device)
        
        adapter_path = "openreasoner/Math-psa"

        adapter_config = PeftConfig.from_pretrained(adapter_path)

        
        if not quantization_config:
            self.model.to(self.device)
        self.aggregation = aggregation


        self.tokenizer.pad_token_id = 0  # unk. we want this to be different from the eos token
        self.tokenizer.padding_side = "left"  # Allow batched inference

        self.candidate_tokens = self.tokenizer.encode(f" {self.good_token} {self.bad_token}") # [488, 481]
        self.step_tag_id = self.tokenizer.encode(f" {self.step_tag}")[-1] # 76325



    def __call_single(self, single_beam: str) -> float | list[float]:
        input_for_prm = single_beam
        # input_id = torch.tensor([self.tokenizer.encode(input_for_prm)]).to(self.device)

        # with torch.no_grad():
        #     logits = self.model(input_id).logits[:,:,self.candidate_tokens]
        #     scores = logits.softmax(dim=-1)[:,:,0]#for the good token
        #     step_scores = scores[input_id == self.step_tag_id]
       
        # step_probs  = step_scores.tolist()


        ###
        input_id = torch.tensor([self.tokenizer.encode(input_for_prm)]).to(self.device)

        with torch.no_grad():
            logits = self.model(input_id).logits[:,:,self.candidate_tokens]
            # print(logits)
            scores = logits.softmax(dim=-1)[:,:,0] 
            # print(scores)
            step_scores = scores[input_id == self.step_tag_id]
            step_probs  = step_scores.tolist()
        ###

        if self.aggregation == "min":
            return min(step_probs)
        elif self.aggregation == "max":
            return max(step_probs)
        elif self.aggregation == "mean":
            return statistics.mean(step_probs)
        elif self.aggregation == "prod":
            return math.prod(step_probs)
        elif self.aggregation == "last":
            return step_probs[-1]
        elif self.aggregation == "full":
            return step_probs
        else:
            raise NotImplementedError

    def __call__(self, steps: list[str]) -> list[StepScore]:
        """
        Args:
            steps (list[str]): A list of reasoning solutions.

        Returns:
            list[StepScore]: A list of dictionaries where each dictionary
        """

        result = []

        for beam in steps:#each beam is a cot solution, each step_score is a list of prm step scores for this solution(if aggregation methods is full)
            step_score = self.__call_single(beam)
            result.append(StepScore(step=beam, score=step_score))

        return result
    



In [5]:

prm = math_psa_prm(
            aggregation="full", 
        )

Downloading shards: 100%|██████████| 4/4 [24:24<00:00, 366.12s/it]
Loading checkpoint shards: 100%|██████████| 4/4 [00:00<00:00,  7.63it/s]


OutOfMemoryError: CUDA out of memory. Tried to allocate 26.00 MiB. GPU 0 has a total capacity of 44.31 GiB of which 15.69 MiB is free. Including non-PyTorch memory, this process has 1.88 GiB memory in use. Process 18716 has 40.51 GiB memory in use. Process 20646 has 1.88 GiB memory in use. Of the allocated memory 1.45 GiB is allocated by PyTorch, and 17.98 MiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting PYTORCH_CUDA_ALLOC_CONF=expandable_segments:True to avoid fragmentation.  See documentation for Memory Management  (https://pytorch.org/docs/stable/notes/cuda.html#environment-variables)

In [None]:
json_file_path = ""###mixedomain format json file
data = read_json_file(json_file_path)

#organizing input cot data format based on different prm data formats
for each_data in tqdm(data):
    for cot in each_data["chain_of_thoughts"]:
        steps = cot["steps"]
        steps = [step.replace(prm.step_tag, "") for step in steps]
        updated_steps = []
        for index, step in enumerate(steps):
            indexed_step = f"\nStep {str(index+1)}: {step} {prm.step_tag}"
            updated_steps.append(indexed_step)
        steps = updated_steps
        question = each_data["question"].replace(prm.step_tag, "")
        steps_all = f"{question} " + "".join(steps)
        rewards = prm([steps_all])
        cot["prm_reward"] = rewards[0].score
    



In [None]:
import math
import statistics
from typing import Optional
from tqdm import tqdm
import torch
from prm_interface import PRM, StepScore
from torch.types import Device
from transformers import (  # type: ignore  # type: ignore
    AutoModelForCausalLM,
    AutoTokenizer,
    BitsAndBytesConfig,
)

import json

def read_json_file(file_path):

    with open(file_path, 'r') as file:
        data = json.load(file)
    return data


class Mistral7bPRM(PRM):
    def __init__(
        self,
        aggregation: str = "full",#the way how prm step scores will be aggregated in a solution
        quantization_config: Optional[BitsAndBytesConfig] = None,
        device: Optional[Device] = None,
    ) -> None:
        self.device = (
            device if device else ("cuda" if torch.cuda.is_available() else "cpu")
        )
        self.good_token = '+' #token in the vocabulary set that indicates the probability of good step 
        self.bad_token = '-' #token in the vocabulary set that indicates the probability of bad step 
        self.step_tag = 'ки'# step deliminator to locate the logits to get the prm score for each step

        self.tokenizer = AutoTokenizer.from_pretrained('peiyi9979/math-shepherd-mistral-7b-prm')
        print(type(self.tokenizer))
        print(tokenizer)
        
        
        self.candidate_tokens = self.tokenizer.encode(f"{self.good_token} {self.bad_token}")[1:] # [648, 387]
        self.step_tag_id = self.tokenizer.encode(f"{self.step_tag}")[-1] # 12902
        self.model = AutoModelForCausalLM.from_pretrained('peiyi9979/math-shepherd-mistral-7b-prm',
                                                       quantization_config=quantization_config).eval()
        if not quantization_config:
            self.model.to(self.device)

        self.aggregation = aggregation

    def __call_single(self, single_beam: str) -> float | list[float]:
        input_for_prm = single_beam
        input_id = torch.tensor([self.tokenizer.encode(input_for_prm)]).to(self.device)

        with torch.no_grad():
            logits = self.model(input_id).logits[:,:,self.candidate_tokens]
            scores = logits.softmax(dim=-1)[:,:,0]#for the good token
            step_scores = scores[input_id == self.step_tag_id]
       
        step_probs  = step_scores.tolist()

        if self.aggregation == "min":
            return min(step_probs)
        elif self.aggregation == "max":
            return max(step_probs)
        elif self.aggregation == "mean":
            return statistics.mean(step_probs)
        elif self.aggregation == "prod":
            return math.prod(step_probs)
        elif self.aggregation == "last":
            return step_probs[-1]
        elif self.aggregation == "full":
            return step_probs
        else:
            raise NotImplementedError

    def __call__(self, steps: list[str]) -> list[StepScore]:
        """
        Args:
            steps (list[str]): A list of reasoning solutions.

        Returns:
            list[StepScore]: A list of dictionaries where each dictionary
        """

        result = []

        for beam in steps:#each beam is a cot solution, each step_score is a list of prm step scores for this solution(if aggregation methods is full)
            step_score = self.__call_single(beam)
            result.append(StepScore(step=beam, score=step_score))

        return result
    
prm = Mistral7bPRM(
                aggregation="full", 
            )
    



In [None]:
import math
import statistics
from typing import Optional
from tqdm import tqdm
import torch
from prm_interface import PRM, StepScore
from torch.types import Device
from transformers import (  # type: ignore  # type: ignore
    AutoModelForCausalLM,
    AutoTokenizer,
    BitsAndBytesConfig,
)

import json

def read_json_file(file_path):

    with open(file_path, 'r') as file:
        data = json.load(file)
    return data


class Mistral7bPRM(PRM):
    def __init__(
        self,
        aggregation: str = "full",#the way how prm step scores will be aggregated in a solution
        quantization_config: Optional[BitsAndBytesConfig] = None,
        device: Optional[Device] = None,
    ) -> None:
        self.device = (
            device if device else ("cuda" if torch.cuda.is_available() else "cpu")
        )
        self.good_token = '+' #token in the vocabulary set that indicates the probability of good step 
        self.bad_token = '-' #token in the vocabulary set that indicates the probability of bad step 
        self.step_tag = 'ки'# step deliminator to locate the logits to get the prm score for each step

        self.tokenizer = AutoTokenizer.from_pretrained('peiyi9979/math-shepherd-mistral-7b-prm')
        # print(self.tokenizer)
        print(type(self.tokenizer))
        
        
        self.candidate_tokens = self.tokenizer.encode(f"{self.good_token} {self.bad_token}")[1:] # [648, 387]
        self.step_tag_id = self.tokenizer.encode(f"{self.step_tag}")[-1] # 12902
        self.model = AutoModelForCausalLM.from_pretrained('peiyi9979/math-shepherd-mistral-7b-prm',
                                                       quantization_config=quantization_config).eval()
        if not quantization_config:
            self.model.to(self.device)

        self.aggregation = aggregation

    def __call_single(self, single_beam: str) -> float | list[float]:
        input_for_prm = single_beam
        input_id = torch.tensor([self.tokenizer.encode(input_for_prm)]).to(self.device)

        with torch.no_grad():
            logits = self.model(input_id).logits[:,:,self.candidate_tokens]
            scores = logits.softmax(dim=-1)[:,:,0]#for the good token
            step_scores = scores[input_id == self.step_tag_id]
       
        step_probs  = step_scores.tolist()

        if self.aggregation == "min":
            return min(step_probs)
        elif self.aggregation == "max":
            return max(step_probs)
        elif self.aggregation == "mean":
            return statistics.mean(step_probs)
        elif self.aggregation == "prod":
            return math.prod(step_probs)
        elif self.aggregation == "last":
            return step_probs[-1]
        elif self.aggregation == "full":
            return step_probs
        else:
            raise NotImplementedError

    def __call__(self, steps: list[str]) -> list[StepScore]:
        """
        Args:
            steps (list[str]): A list of reasoning solutions.

        Returns:
            list[StepScore]: A list of dictionaries where each dictionary
        """

        result = []

        for beam in steps:#each beam is a cot solution, each step_score is a list of prm step scores for this solution(if aggregation methods is full)
            step_score = self.__call_single(beam)
            result.append(StepScore(step=beam, score=step_score))

        return result
    

prm = Mistral7bPRM(
            aggregation="full", 
        )
    
