In [44]:
import re
import os
import json
import string
from tqdm import tqdm
import torch
import numpy as np
from openai import OpenAI
from collections import Counter
import backoff
from transformers import AutoTokenizer

In [20]:
port = 8001
model_name = 'Qwen/QwQ-32B'
tokenizer = AutoTokenizer.from_pretrained(model_name)

OPENAI_REQUEST_TIMEOUT = 60*60*24 
client = OpenAI(base_url=f"http://localhost:{port}/v1", api_key="EMPTY", timeout=OPENAI_REQUEST_TIMEOUT)
print(client.models.list())

# @backoff.on_exception(backoff.constant, Exception, interval=5)
def run_chat_completion_with_backoff(client, **kwargs):
    return client.chat.completions.create(**kwargs)

@backoff.on_exception(backoff.constant, Exception, interval=5)
def run_generate_with_backoff(client, **kwargs):
    return client.completions.create(**kwargs)

def segment_thoughts_v1(x):
    return x.strip().split('\n\n')

def segment_thoughts_v2(x):
    # note: excluding things like "so" "therefore", "but", "let me" 
    reasoning_word_list = [
        'okay', 'hmm', 'wait', 'but wait', 'oh wait', 'no wait', 'no, wait', 'but let me', 'but actually', 'alternatively', 
        'now', 'the question', 'ah', 'oh', 'next', 'another angle', 'another approach', 'also', 'hold on', 'looking it up', 
        'another point', 'I don\'t think', 'perhaps I', 'putting this together', 'Putting it all together', 'i\'m', 'but i\'m',   
        'let me think again', 'I don\'t see', 'maybe I', 'alternative', "I wonder if", "another way", 'an alternative', 
    ]
    prefix_len = max([len(x) for x in reasoning_word_list])
    newline_segmented_thoughts = segment_thoughts_v1(x)
    final_thoughts = []
    for t in newline_segmented_thoughts:
        t_lower = t.lower()
        is_segment_start = False
        for r_w in reasoning_word_list:
            if t_lower.startswith(r_w.lower()):
                is_segment_start = True
                break
        if is_segment_start or not final_thoughts:
            final_thoughts.append(t)
        else:
            final_thoughts[-1] += '\n\n' + t
    return final_thoughts


SyncPage[Model](data=[Model(id='Qwen/QwQ-32B', created=1752252216, object='model', owned_by='vllm', root='Qwen/QwQ-32B', parent=None, max_model_len=40960, permission=[{'id': 'modelperm-5d6849a416c34f3a92652630642b03b3', 'object': 'model_permission', 'created': 1752252216, 'allow_create_engine': False, 'allow_sampling': True, 'allow_logprobs': True, 'allow_search_indices': False, 'allow_view': True, 'allow_fine_tuning': False, 'organization': '*', 'group': None, 'is_blocking': False}])], object='list')


In [85]:
domain = 'science'
data_dir = '/fsx-comem/diwu0162/OpenThoughts3/data/'
data_file = f'{data_dir}/train_sample_{domain}_1k.jsonl'
domain_data = [json.loads(line) for line in open(data_file).readlines()]
domain_data_finished_thinking_only = [x for x in domain_data if '</think>' in x['conversations'][1]['value']]
domain_data_unfinished_thinking_only = [x for x in domain_data if '</think>' not in x['conversations'][1]['value']]

In [None]:
def generate_specific_hint(question, teacher_answer, teacher_thought_str):
    # assert teacher_answer != ""
    prompt = (f"You are an expert tutor. Given a question, a final answer written by the teacher, and a long thinking process written by a teacher, "
              f"write a brief hint that can help yourself approach similar questions without revealing the answer or any intermediate results. "
              f"The hint should outline the steps to solve these general questions and the general strategy. "
              f"The hint should also highlight key points in thinking to expedite problem solving and avoid common traps. "
              f"Utilize and pay special attention to the places where the teacher also gets confused or spends too much time. "
              f"Start by outlining the general problem under a section ### Applicable Problems . Then, start your hint on a new line by ### Hint ."
              f"Inside the hint, you must first re-state the general problem setting that the hint can apply to. "
              f"Then, use a first person perspective just like you are the student, e.g., say something like 'For problems like X, I should...'."
              f"\n\n\n### Question:\n{question}\n\n\n### Teacher's Answer:\n{answer}\n\n\n### Teacher's Thinking:\n{teacher_thought_str}"
              f"\n\n\nNow, analyze the question, answer, and teacher's thought and write your hint. Make sure your hint helps solving"
              f" approach similar questions without revealing the answer or any intermediate results. ")
    prompt_formatted = tokenizer.apply_chat_template([{"role": "user", "content": prompt}], tokenize=False, add_generation_prompt=True)
    # print(prompt_formatted)
    # print(len(tokenizer.encode(prompt_formatted)))

    response = client.completions.create(model=model_name, prompt=prompt_formatted, n=1, temperature=0.7, top_p=0.8, 
                                          max_tokens=10000, timeout=OPENAI_REQUEST_TIMEOUT,
                                          extra_body={'top_k': 20, 'include_stop_str_in_output': True, 'repetition_penalty': 1.05,})
    hint_text_raw = response.choices[0].text
    if '</think>' in hint_text_raw:
        hint_derivation_thinking = hint_text_raw.split('</think>')[0].strip()
        hint_text = hint_text_raw.split('</think>')[1].strip()
    else:
        hint_derivation_thinking = ''
        hint_text = hint_text_raw.strip()

    # further process hint_text
    hint_result_dict = {'hint_derivation_thinking': hint_derivation_thinking}
    if '### Hint' in hint_text:
        hint_result_dict['hint'] = hint_text.split('### Hint')[-1].strip()
        hint_result_dict['applicable_problems'] = hint_text.split('### Hint')[0].strip() 
        hint_result_dict['applicable_problems'] = hint_result_dict['applicable_problems'].replace('### Applicable Problems', '').strip()
    else:
        hint_result_dict['hint'] = hint_text
        hint_result_dict['applicable_problems'] = ''
    
    return hint_text_raw, hint_result_dict
    

idx = 40
entry = domain_data_finished_thinking_only[idx]
# entry = domain_data_unfinished_thinking_only[idx]
question = entry['conversations'][0]['value']
if '</think>' in entry['conversations'][1]['value']:
    thoughts = entry['conversations'][1]['value'].split('</think>')[0].replace('<think>', '').strip()
    answer = entry['conversations'][1]['value'].split('</think>')[-1].strip()
else:
    thoughts = entry['conversations'][1]['value'].replace('<think>', '').strip()
    answer = 'answer unknown due to thinking unfinished'


hint_text_raw, problem_specific_hint = generate_specific_hint(question, answer, thoughts)

# some visualization
print('Question:', question)
print('\n\n==================================')
print('QwQ solution after thinking:\n\n', answer, sep='')
print('\n\n==================================')
print('QwQ\'s hint after observing question + thinking + solution:\n\n')
print(hint_text_raw)
print('\n\n==================================')
print(json.dumps(problem_specific_hint, indent=4))

Question: For a charged particle in a magnetic field the equation of motion is
$$\frac {d \vec v}{dt}=\frac q m \left[\frac {\vec v} c \times \vec B(\vec x)\right]$$
Then, if $\vec B$ is a slowly changing field the motion $\vec v(t)=v_0 \frac {\vec B(\vec x)}{B(\vec x)}$, that represent a particle following the magnetic field line, is a good approximated solution of the problem (I know it represents a particular case but it's the one I'm interested in).
Even if I understand that this is a good approximation, I don't know which method should be used in order to derive this approximated solution from the original problem
I tired zero order Taylor expansion, but the result is different, indeed expanding the field around $\vec x_0$ and considering the zero order term:
$$\vec B(\vec x)=\vec B(\vec x_0)$$
So the equation of motion becomes
$$\frac {d \vec v}{dt}=\frac q m \left[\frac {\vec v} c \times \vec B(\vec x_0)\right]$$
A solution of this equation is $\vec v=v_0 \frac {\vec B(\vec x_0)