In [None]:
import numpy as np
from matplotlib import colormaps, colors
from IPython.display import HTML, display
from transformers import DynamicCache
from tqdm.auto import tqdm
import pylab as pl
import pandas as pd
from loguru import logger
from cmap import Colormap
import html
from transformers import AutoTokenizer
from transformers import AutoModelForCausalLM, BitsAndBytesConfig
import torch
import os

from llm_moral_foundations2.utils import sanitize_filename, clear_mem
from llm_moral_foundations2.steering import wrap_model, load_steering_ds, train_steering_vector, make_dataset
from llm_moral_foundations2.load_model import load_model, work_out_batch_size
from llm_moral_foundations2.config import project_dir


<torch.autograd.grad_mode.set_grad_enabled at 0x74c0b82257e0>

In [None]:
import sglang as sgl
from sglang import assistant_begin, assistant_end
from sglang import assistant, function, gen, system, user
from sglang import image
from sglang import RuntimeEndpoint, set_default_backend

from sglang.lang.choices import ChoicesDecision, ChoicesSamplingMethod, UnconditionalLikelihoodNormalized
from typing import List, Any, Optional
import numpy as np
from scipy.stats import kendalltau


class WeightedRating(ChoicesSamplingMethod):
    """
    This uses the single logprobs for integers to compute a weighted rating. That way we can get a distribution from just one inference. For best results run it with the reverse scale to remove bias towards the first choice.
    """

    def __call__(
        self,
        *,
        choices: List[str],
        normalized_prompt_logprobs: List[float],
        input_token_logprobs: List[List[Any]],
        output_token_logprobs: List[List[Any]],
        unconditional_token_logprobs: Optional[List[List[Any]]] = None,
    ) -> ChoicesDecision:
        """Select the option with the highest token length normalized prompt logprob."""
        choice_ints = np.array([int(c) for c in choices])
        choice_probs = np.exp(np.array(normalized_prompt_logprobs))
        weights = choice_probs / choice_probs.sum()
        decision = (choice_ints * weights).sum()
        # now normalise it to [0, 1]
        decision = (decision - choice_ints.min()) / (choice_ints.max() - choice_ints.min())        

        meta_info = {
            "normalized_prompt_logprobs": normalized_prompt_logprobs,
            "input_token_logprobs": input_token_logprobs,
            "output_token_logprobs": output_token_logprobs,
        }
        return ChoicesDecision(decision=f"{decision:2.6f}", meta_info=meta_info)

weighted_rating = WeightedRating()

# https://docs.sglang.ai/references/frontend/frontend_tutorial.html
@sgl.function
def rate_content(s):
    s += sgl.user(
        f"rate the morality of the following content on a scale from 0: very immoral, to 5: very ethical. {s}"
    )
    s += sgl.assistant(
        "My rating of this, on your scale, is: "
        + sgl.gen(
            "answer",
            choices=['1', '2', '3', '4', '5'],
            choices_method=weighted_rating,
        )
    )


# first start the server e.g. python -m sglang.launch_server --model-path opencompass/CompassJudger-2-7B-Instruct --host 0.0.0.0`"

s = rate_content("I stole a cookie from the cookie jar")

logp = s.get_meta_info("answer")["normalized_prompt_logprobs"]
weight = s['answer']
