<a href="https://colab.research.google.com/github/yuchenhe-xai/yccolab/blob/main/1206_local_quick_dataviewer.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import json
import pandas as pd
from IPython.display import display, Markdown
from IPython.display import HTML
import requests

In [87]:
#@title just easy show conversation for debugging  (code by Grok)
import inspect

# sys.path.append(os.path.expanduser('~/xai/train/'))

# import tkinter as tk
from IPython.display import HTML, display


def display_conversation(conversation, conv_loss_mask=None, ratings=None):
    """
    Display a conversation in a notebook with different colors for the 'user' and 'assistant' roles,
    and show thumbs up or down based on ratings.

    Parameters:
    - conversation (list of dict or str): List where each element can be a dictionary with 'role' and 'content' or just a string.
    - conv_loss_mask (list of bool): List of boolean values indicating whether to highlight the message.
    - ratings (list of str): List of strings where 'LIKE' means thumbs up, 'DISLIKE' means thumbs down.
    """
    user_bg_color = "#d0e7ff"  # light blue for user messages
    assistant_bg_color = "#d4edda"  # light green for assistant messages
    user_text_color = "#084298"  # darker blue for user text
    assistant_text_color = "#155724"  # darker green for assistant text
    html_output = ""
    roles_default = ["user", "assistant"]

    for i, entry in enumerate(conversation):
        if isinstance(entry, str):
            role = roles_default[i % 2]
            content = entry
        else:
            role = entry["role"]
            content = entry["content"]
        is_user = role.lower() in ["user", "human"]

        # Set color based on role
        bg_color = user_bg_color if is_user else assistant_bg_color
        text_color = user_text_color if is_user else assistant_text_color
        role_display = "User" if is_user else "Assistant"
        rating_display = ""

        # Add rating icon if ratings are provided
        if ratings and i < len(ratings):
            rating = ratings[i]
            if rating == 'LIKE':
                rating_display = "&#x1F44D;"  # Thumbs up emoji
            elif rating == 'DISLIKE':
                rating_display = "&#x1F44E;"  # Thumbs down emoji

        # Highlight if conv_loss_mask is True for this message
        highlight_style = "font-weight: bold; text-decoration: underline;" if conv_loss_mask and conv_loss_mask[i] else "font-weight: normal;"

        # Append each entry to HTML string with styling and rating icon
        html_output += f"""
        <div style='background-color: {bg_color}; color: {text_color};
                    border-radius: 8px; padding: 10px; margin: 8px 0;'>
            <strong style='{highlight_style}'>{role_display}:</strong> {rating_display}<br>
            <span style='{highlight_style}'>{content}</span>
        </div>
        """

    display(HTML(html_output))


In [91]:
#@title just easy show rubrics + comparision  (code by Grok)

from IPython.display import display, HTML
import markdown
from pygments import highlight
from pygments.lexers import get_lexer_by_name
from pygments.formatters import HtmlFormatter
from IPython.core.display import display, HTML
from IPython.display import Javascript
display(Javascript('''google.colab.output.setIframeHeight(0, true, {maxHeight: 5000})'''))
# Function to apply syntax highlighting
def syntax_highlight(code, language='python'):
    lexer = get_lexer_by_name(language, stripall=True)
    formatter = HtmlFormatter(style='monokai', noclasses=True)
    return highlight(code, lexer, formatter)

# Function to convert Markdown to HTML with code highlighting
def markdown_to_html(markdown_text):
    # Use markdown.markdown for older versions of markdown
    return markdown.markdown(markdown_text, extensions=['fenced_code', 'codehilite'])


def display_rubrics_ratings(data):
  # print(data['point_wise_scores'])
  # print(data['ranked_model_list'])
  prompt = data['prompt']
  rubrics = data['rubrics']
  html_content = ""
  rubrics_count = len(rubrics)//2

  criteria_mapping = {}
  criteria_rating = {}
  for i in range(len(rubrics)):
      criteria = rubrics[i]['criteria']
      criteria_mapping[criteria] = criteria_mapping.get(criteria, {}) | {rubrics[i]['metadata']['model']: i}
      criteria_rating[criteria] = criteria_rating.get(criteria, {}) | {rubrics[i]['metadata']['model']: int("yes" == rubrics[i]['label'])}
  model_order = []
  for criteria in criteria_mapping:
    grok = 1 - int('grok' in list(criteria_mapping[criteria])[0])
    criteria_mapping[criteria] = [v for k, v in criteria_mapping[criteria].items()]
    criteria_mapping[criteria] = criteria_mapping[criteria][::-1] if grok else criteria_mapping[criteria]
    model_order = criteria_mapping[criteria]
    criteria_rating[criteria] = [v for k, v in criteria_rating[criteria].items()]
    criteria_rating[criteria] = criteria_rating[criteria][::-1] if grok else criteria_rating[criteria]

  grok = int('grok' in data['ranked_model_list'][0])
  model1 = rubrics[model_order[0]]['metadata']['model']
  response1 = rubrics[model_order[0]]['response']
  model2 = rubrics[model_order[1]]['metadata']['model']
  response2 = rubrics[model_order[1]]['response']
  score1 = data['point_wise_scores'][1-grok]
  score2 = data['point_wise_scores'][grok]
  bg_colors = {
      1: "rgb(255, 0, 0)",   # Red
      2: "rgb(255, 64, 0)",
      3: "rgb(255, 128, 0)",
      4: "rgb(192, 255, 0)",
      5: "rgb(128, 255, 0)",
      6: "rgb(64, 255, 0)",
      7: "rgb(0, 255, 0)"    # Green
  }
  bg_color1, bg_color2 = bg_colors[score1], bg_colors[score2]

  html_content += f"""
      <div style="padding: 10px; border: 1px solid #808080; background-color: #222222;">
          {markdown_to_html(prompt)}
      </div>
  """
  html_content += f"""
      <div style="margin-bottom: 10px; color: #000000;">
          <div style="display: flex;">
              <div style="flex: 1; margin-right: 10px;">
                  <div style="padding: 10px; border: 1px solid #808080; background-color: {bg_color1}; opacity: 0.7;">
                      <strong>Model:</strong> <em>{model1}</em><br>
                      <strong>Score:</strong> <strong>{score1}</strong><br>
                      <strong>Response:</strong><br>
                      {markdown_to_html(response1)}
                  </div>
              </div>
              <div style="flex: 1;">
                  <div style="padding: 10px; border: 1px solid #808080; background-color: {bg_color2}; opacity: 0.7;">
                      <strong>Model:</strong> <em>{model2}</em><br>
                      <strong>Score:</strong> <strong>{score2}</strong><br>
                      <strong>Response:</strong><br>
                      {markdown_to_html(response2)}
                  </div>
              </div>
          </div>
      </div>
  """

  criteria_rating = dict(sorted(criteria_rating.items(), key=lambda x: - x[1][0] + x[1][1]))
  # print(criteria_mapping.values(), criteria_rating.values())
  for criteria in criteria_rating:
      indices = criteria_mapping[criteria]
      i = indices[0]
      j = indices[1]
      label1 = rubrics[i]['label']
      label2 = rubrics[j]['label']
      model1 = rubrics[i]['metadata']['model']
      model2 = rubrics[j]['metadata']['model']
      # Color coding for labels and background color for models/responses in dark mode
      color1 = '#00FF00' if label1 == 'yes' else '#FF0000'
      color2 = '#00FF00' if label2 == 'yes' else '#FF0000'
      bg_color1 = '#003300' if label1 == 'yes' else '#440000'
      bg_color2 = '#003300' if label2 == 'yes' else '#440000'

      # Construct HTML for each pair
      html_content += f"""
      <div style="margin-bottom: 10px; color: #FFFFFF;">
          <h4>{criteria}</h4>
          <div style="display: flex;">
              <div style="flex: 1; margin-right: 10px;">
                  <div style="padding: 10px; border: 1px solid #808080; background-color: {bg_color1}; text-align: center;">
                      <strong style="color: {color1};">{model1.upper()}</strong>
                  </div>
              </div>
              <div style="flex: 1;">
                  <div style="padding: 10px; border: 1px solid #808080; background-color: {bg_color2}; text-align: center;">
                      <strong style="color: {color2};">{model2.upper()}</strong>
                  </div>
              </div>
          </div>
      </div>
      """

  # Display the HTML in the IPython notebook
  display(HTML(html_content))

# display_rubrics_ratings(each_data)

  from IPython.core.display import display, HTML


<IPython.core.display.Javascript object>

In [92]:
all_cbv_disagree = json.load(open("/Users/yuchen/cbv-lmsys-disagree.json", "r"))
i = 0

In [100]:
i += 1
print(i)
each_data = all_cbv_disagree[i]
display_rubrics_ratings(each_data)

8
