In [8]:
# Import all required libraries
import torch
from datasets import load_dataset
from recbole.config import Config
from recbole.data import create_dataset, data_preparation
from recbole.model.general_recommender import BPR, Pop
from recbole.model.sequential_recommender import SASRec
from recbole.utils import init_seed, init_logger, get_trainer

# Set torch.load compatibility
torch.serialization.add_safe_globals([dict, list, tuple, set])

print("All libraries imported successfully!")

All libraries imported successfully!


In [9]:
# Load Amazon Reviews 2023 dataset using RecBole
print("=== Loading Amazon Reviews 2023 Dataset ===")

# Load raw dataset (optional, for comparison)
raw_dataset = load_dataset("McAuley-Lab/Amazon-Reviews-2023", "raw_review_All_Beauty", trust_remote_code=True)
print(f"Raw dataset loaded, samples: {len(raw_dataset['full'])}")

# 在RecBole中使用5-core过滤
print("=== Using 5-core Filtering in RecBole ===")

config_5core = Config(
    model='SASRec',
    dataset='All_Beauty', 
    config_dict={
        'data_path': 'seq_rec_results/dataset/processed/',
        'load_col': {
            'inter': ['user_id', 'item_id_list', 'item_id']
        },
        'benchmark_filename': ['train', 'valid', 'test'],
        'alias_of_item_id': ['item_id_list'],
        'train_neg_sample_args': None,
        'loss_type': 'CE',
    }
)

# 创建5-core过滤后的数据集
print("Creating 5-core filtered dataset...")
dataset_5core = create_dataset(config_5core)
train_data_5core, valid_data_5core, test_data_5core = data_preparation(config_5core, dataset_5core)

print(f"\n📊 5-core Filtered Dataset Statistics:")
print(f"Users: {dataset_5core.user_num}")
print(f"Items: {dataset_5core.item_num}")
print(f"Interactions: {dataset_5core.inter_num}")

=== Loading Amazon Reviews 2023 Dataset ===
Raw dataset loaded, samples: 701528
=== Using 5-core Filtering in RecBole ===
Creating 5-core filtered dataset...

📊 5-core Filtered Dataset Statistics:
Users: 254
Items: 357
Interactions: 2282


The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  df[field].fillna(value="", inplace=True)
The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  df[field].fillna(value="", inplace=True)
The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always beha

In [10]:
# Define unified model training function
def train_model(model_type, dataset_name='All_Beauty', epochs=10, **kwargs):
    """
    Unified function to train recommendation models
    
    Args:
        model_type: Model type ('BPR', 'SASRec', 'Pop')
        dataset_name: Dataset name
        epochs: Training epochs
        **kwargs: Additional model-specific parameters
    
    Returns:
        dict: Dictionary containing model, trainer, config and results
    """
    
    print(f"\n=== Training {model_type} Model ===")
    
    # Base configuration
    base_config = {
        'data_path': 'seq_rec_results/dataset/processed/',
        'benchmark_filename': ['train', 'valid', 'test'],
        'epochs': epochs,
        'stopping_step': 10,
        'eval_step': 1,
        'metrics': ['Recall', 'NDCG'],
        'topk': [10, 20],
        'valid_metric': 'NDCG@10',
        'checkpoint_dir': './checkpoints/',
        'show_progress': True
    }
    
    # Model-specific configurations
    if model_type == 'BPR':
        model_class = BPR
        model_config = {
            **base_config,
            'load_col': {'inter': ['user_id', 'item_id']},
            'train_neg_sample_args': {
                'distribution': 'uniform',
                'sample_num': 1,
                'alpha': 1.0,
                'dynamic': False,
                'candidate_num': 0
            },
            'loss_type': 'BPR',
            'learning_rate': 0.001,
            'train_batch_size': 2048,
        }
        
    elif model_type == 'SASRec':
        model_class = SASRec
        model_config = {
            **base_config,
            'load_col': {'inter': ['user_id', 'item_id_list', 'item_id']},
            'alias_of_item_id': ['item_id_list'],
            'train_neg_sample_args': None,
            'loss_type': 'CE',
            'learning_rate': 0.001,
            'train_batch_size': 256,
            'max_seq_length': 50,
            'hidden_size': 64,
            'n_layers': 2,
            'n_heads': 2,
            'inner_size': 256,
            'hidden_dropout_prob': 0.5,
            'attn_dropout_prob': 0.5,
        }
        
    elif model_type == 'Pop':
        model_class = Pop
        model_config = {
            **base_config,
            'load_col': {'inter': ['user_id', 'item_id']},
            'train_neg_sample_args': None,
        }
    else:
        raise ValueError(f"Unsupported model type: {model_type}")
    
    # Merge user-defined parameters
    model_config.update(kwargs)
    
    # Create config and dataset
    config = Config(
        model=model_type,
        dataset=dataset_name,
        config_dict=model_config
    )
    
    # Create dataset
    model_dataset = create_dataset(config)
    train_data, valid_data, test_data = data_preparation(config, model_dataset)
    
    print(f"{model_type} dataset stats:")
    print(f"Users: {model_dataset.user_num}")
    print(f"Items: {model_dataset.item_num}")
    print(f"Interactions: {model_dataset.inter_num}")
    
    # Initialize model and trainer
    init_seed(config['seed'], config['reproducibility'])
    model = model_class(config, model_dataset).to(config['device'])
    trainer = get_trainer(config['MODEL_TYPE'], config['model'])(config, model)
    
    print(f"Training {model_type} model...")
    
    # torch.load compatibility settings
    original_load = torch.load
    def safe_load(*args, **kwargs):
        kwargs['weights_only'] = False
        return original_load(*args, **kwargs)
    torch.load = safe_load
    
    try:
        # Train model
        best_valid_score, best_valid_result = trainer.fit(
            train_data, valid_data, saved=True, show_progress=True
        )
        
        print(f"{model_type} training completed!")
        print(f"Best validation result: {best_valid_result}")
        
        # Test model
        test_result = trainer.evaluate(test_data, load_best_model=True, show_progress=True)
        print(f"{model_type} test result: {test_result}")
        
        return {
            'model_type': model_type,
            'model': model,
            'trainer': trainer,
            'config': config,
            'dataset': model_dataset,
            'train_data': train_data,
            'valid_data': valid_data,
            'test_data': test_data,
            'best_valid_result': best_valid_result,
            'test_result': test_result
        }
        
    finally:
        # Restore original torch.load function
        torch.load = original_load

print("Unified training function defined!")

Unified training function defined!


In [11]:
# Train all models using unified function
print("=== Training All Models with Unified Function ===")

# Store all model results
model_results = {}

# Train all three models
models_to_train = [
    {'model_type': 'Pop', 'epochs': 1},  # Pop model trains quickly
    {'model_type': 'BPR', 'epochs': 5},  # BPR model
    {'model_type': 'SASRec', 'epochs': 5}  # SASRec model
]

for model_config in models_to_train:
    try:
        result = train_model(**model_config)
        model_results[model_config['model_type']] = result
        print(f"✅ {model_config['model_type']} training successful")
    except Exception as e:
        print(f"❌ {model_config['model_type']} training failed: {str(e)}")
        model_results[model_config['model_type']] = None

print(f"\nTraining completed! Successfully trained {len([r for r in model_results.values() if r is not None])} models")

=== Training All Models with Unified Function ===

=== Training Pop Model ===
Pop dataset stats:
Users: 254
Items: 352
Interactions: 2282


The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  feat[field].fillna(value=0, inplace=True)


Training Pop model...


  scaler = amp.GradScaler(enabled=self.enable_scaler)
[1;35mTrain     0[0m: 100%|███████████████████████████| 1/1 [00:00<00:00, 17.91it/s, [1;33mGPU RAM: 0.00 G/79.14 G[0m][0m
[1;35mEvaluate   [0m: 100%|████████████████████████| 13/13 [00:00<00:00, 146.25it/s, [1;33mGPU RAM: 0.00 G/79.14 G[0m][0m


Pop training completed!
Best validation result: OrderedDict([('recall@10', 0.0), ('recall@20', 0.0124), ('ndcg@10', 0.0), ('ndcg@20', 0.0033)])


[1;35mEvaluate   [0m: 100%|██████████████████████████| 2/2 [00:00<00:00, 569.22it/s, [1;33mGPU RAM: 0.00 G/79.14 G[0m][0m
The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  feat[field].fillna(value=0, inplace=True)


Pop test result: OrderedDict([('recall@10', 0.0), ('recall@20', 0.0), ('ndcg@10', 0.0), ('ndcg@20', 0.0)])
✅ Pop training successful

=== Training BPR Model ===
BPR dataset stats:
Users: 254
Items: 352
Interactions: 2282
Training BPR model...


  scaler = amp.GradScaler(enabled=self.enable_scaler)
[1;35mTrain     0[0m: 100%|███████████████████████████| 1/1 [00:00<00:00,  6.27it/s, [1;33mGPU RAM: 0.00 G/79.14 G[0m][0m
[1;35mEvaluate   [0m: 100%|████████████████████████| 13/13 [00:00<00:00, 169.93it/s, [1;33mGPU RAM: 0.02 G/79.14 G[0m][0m
[1;35mTrain     1[0m: 100%|██████████████████████████| 1/1 [00:00<00:00, 135.82it/s, [1;33mGPU RAM: 0.02 G/79.14 G[0m][0m
[1;35mEvaluate   [0m: 100%|████████████████████████| 13/13 [00:00<00:00, 674.74it/s, [1;33mGPU RAM: 0.02 G/79.14 G[0m][0m
[1;35mTrain     2[0m: 100%|██████████████████████████| 1/1 [00:00<00:00, 141.88it/s, [1;33mGPU RAM: 0.02 G/79.14 G[0m][0m
[1;35mEvaluate   [0m: 100%|████████████████████████| 13/13 [00:00<00:00, 661.28it/s, [1;33mGPU RAM: 0.02 G/79.14 G[0m][0m
[1;35mTrain     3[0m: 100%|██████████████████████████| 1/1 [00:00<00:00, 145.66it/s, [1;33mGPU RAM: 0.02 G/79.14 G[0m][0m
[1;35mEvaluate   [0m: 100%|████████████████████████| 13

BPR training completed!
Best validation result: OrderedDict([('recall@10', 0.0068), ('recall@20', 0.0317), ('ndcg@10', 0.0039), ('ndcg@20', 0.011)])


[1;35mEvaluate   [0m: 100%|██████████████████████████| 2/2 [00:00<00:00, 576.22it/s, [1;33mGPU RAM: 0.02 G/79.14 G[0m][0m
The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  df[field].fillna(value="", inplace=True)
The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  df[field].fillna(value="", inplace=True)
The behavior will chang

BPR test result: OrderedDict([('recall@10', 0.0), ('recall@20', 0.1111), ('ndcg@10', 0.0), ('ndcg@20', 0.0319)])
✅ BPR training successful

=== Training SASRec Model ===
SASRec dataset stats:
Users: 254
Items: 357
Interactions: 2282
Training SASRec model...


  scaler = amp.GradScaler(enabled=self.enable_scaler)
[1;35mTrain     0[0m: 100%|███████████████████████████| 8/8 [00:00<00:00, 17.19it/s, [1;33mGPU RAM: 0.35 G/79.14 G[0m][0m
[1;35mEvaluate   [0m: 100%|██████████████████████████| 1/1 [00:00<00:00, 292.10it/s, [1;33mGPU RAM: 0.35 G/79.14 G[0m][0m
[1;35mTrain     1[0m: 100%|███████████████████████████| 8/8 [00:00<00:00, 24.35it/s, [1;33mGPU RAM: 0.35 G/79.14 G[0m][0m
[1;35mEvaluate   [0m: 100%|██████████████████████████| 1/1 [00:00<00:00, 307.79it/s, [1;33mGPU RAM: 0.35 G/79.14 G[0m][0m
[1;35mTrain     2[0m: 100%|███████████████████████████| 8/8 [00:00<00:00, 23.08it/s, [1;33mGPU RAM: 0.35 G/79.14 G[0m][0m
[1;35mEvaluate   [0m: 100%|██████████████████████████| 1/1 [00:00<00:00, 309.86it/s, [1;33mGPU RAM: 0.35 G/79.14 G[0m][0m
[1;35mTrain     3[0m: 100%|███████████████████████████| 8/8 [00:00<00:00, 22.93it/s, [1;33mGPU RAM: 0.35 G/79.14 G[0m][0m
[1;35mEvaluate   [0m: 100%|██████████████████████████| 

SASRec training completed!
Best validation result: OrderedDict([('recall@10', 0.1055), ('recall@20', 0.2109), ('ndcg@10', 0.0393), ('ndcg@20', 0.066)])


[1;35mEvaluate   [0m: 100%|██████████████████████████| 1/1 [00:00<00:00, 312.77it/s, [1;33mGPU RAM: 0.35 G/79.14 G[0m][0m

SASRec test result: OrderedDict([('recall@10', 0.0455), ('recall@20', 0.4091), ('ndcg@10', 0.0196), ('ndcg@20', 0.1104)])
✅ SASRec training successful

Training completed! Successfully trained 3 models





In [12]:
# Unified model performance comparison and analysis
def compare_models(model_results):
    """Compare all trained models"""
    
    print("\n" + "="*70)
    print("                Model Performance Comparison Report")
    print("="*70)
    
    # Extract test results
    results_summary = {}
    for model_type, result in model_results.items():
        if result is not None:
            results_summary[model_type] = result['test_result']
        else:
            results_summary[model_type] = {}
    
    # Create comparison table
    print(f"\n{'Metric':<15}", end="")
    model_names = list(results_summary.keys())
    for name in model_names:
        print(f" | {name:<12}", end="")
    print()
    print("-" * (15 + 15 * len(model_names)))
    
    metrics_to_compare = ['recall@10', 'ndcg@10', 'recall@20', 'ndcg@20']
    best_scores = {}
    
    for metric in metrics_to_compare:
        print(f"{metric:<15}", end="")
        metric_values = []
        
        for model_type in model_names:
            value = results_summary[model_type].get(metric, 'N/A')
            if value != 'N/A':
                print(f" | {value:<12.4f}", end="")
                metric_values.append((model_type, value))
            else:
                print(f" | {'N/A':<12}", end="")
        
        print()
        
        # Find best model
        if metric_values:
            best_model, best_score = max(metric_values, key=lambda x: x[1])
            best_scores[metric] = (best_model, best_score)
    
    print("-" * (15 + 15 * len(model_names)))
    
    # Analyze best models
    print(f"\n🏆 Best model for each metric:")
    for metric, (best_model, best_score) in best_scores.items():
        print(f"  {metric}: {best_model} ({best_score:.4f})")
    
    # Overall model ranking
    model_scores = {name: [] for name in model_names}
    for metric in metrics_to_compare:
        for model_type in model_names:
            value = results_summary[model_type].get(metric, 0)
            if value != 'N/A' and value != 0:
                model_scores[model_type].append(value)
    
    avg_scores = {name: sum(scores)/len(scores) if scores else 0 
                  for name, scores in model_scores.items()}
    
    print(f"\n📊 Overall model ranking:")
    sorted_models = sorted(avg_scores.items(), key=lambda x: x[1], reverse=True)
    for i, (model, score) in enumerate(sorted_models, 1):
        print(f"  {i}. {model}: {score:.4f} (avg score)")
    
    # Model characteristics analysis
    print(f"\n📝 Model characteristics:")
    model_analysis = {
        'Pop': 'Item popularity based, fast training, good for cold start',
        'BPR': 'Collaborative filtering, personalized, balanced performance',  
        'SASRec': 'Sequential recommendation, temporal patterns, rich historical data'
    }
    
    for model_type in model_names:
        if model_type in model_analysis:
            status = "✅ Success" if model_results[model_type] else "❌ Failed"
            print(f"  • {model_type}: {model_analysis[model_type]} [{status}]")
    
    return best_scores, sorted_models

# Execute model comparison
if model_results:
    best_scores, model_ranking = compare_models(model_results)
else:
    print("⚠️ No model results to compare, please run model training first")


                Model Performance Comparison Report

Metric          | Pop          | BPR          | SASRec      
------------------------------------------------------------
recall@10       | 0.0000       | 0.0000       | 0.0455      
ndcg@10         | 0.0000       | 0.0000       | 0.0196      
recall@20       | 0.0000       | 0.1111       | 0.4091      
ndcg@20         | 0.0000       | 0.0319       | 0.1104      
------------------------------------------------------------

🏆 Best model for each metric:
  recall@10: SASRec (0.0455)
  ndcg@10: SASRec (0.0196)
  recall@20: SASRec (0.4091)
  ndcg@20: SASRec (0.1104)

📊 Overall model ranking:
  1. SASRec: 0.1462 (avg score)
  2. BPR: 0.0715 (avg score)
  3. Pop: 0.0000 (avg score)

📝 Model characteristics:
  • Pop: Item popularity based, fast training, good for cold start [✅ Success]
  • BPR: Collaborative filtering, personalized, balanced performance [✅ Success]
  • SASRec: Sequential recommendation, temporal patterns, rich historical 

## Test

In [14]:
import json
meta_data = json.load(open('seq_rec_results/dataset/processed/All_Beauty/All_Beauty.data_maps'))

In [3]:
from datasets import load_dataset

datasets = load_dataset(
    "McAuley-Lab/Amazon-Reviews-2023",
    f"5core_timestamp_w_his_All_Beauty",
    trust_remote_code=True
)

  from .autonotebook import tqdm as notebook_tqdm


In [6]:
from tqdm import tqdm
review_list = []
with open('seq_rec_results/dataset/processed/All_Beauty/All_Beauty.jsonl', 'r') as f:
    for line in tqdm(f):
        review_list.append(json.loads(line))


701528it [00:07, 94219.01it/s] 


In [15]:
user_set = set(meta_data['user2id'].keys())
item_set = set(meta_data['item2id'].keys())

In [16]:
review_list[0]

{'rating': 5.0,
 'title': 'Such a lovely scent but not overpowering.',
 'text': "This spray is really nice. It smells really good, goes on really fine, and does the trick. I will say it feels like you need a lot of it though to get the texture I want. I have a lot of hair, medium thickness. I am comparing to other brands with yucky chemicals so I'm gonna stick with this. Try it!",
 'images': [],
 'asin': 'B00YQ6X8EO',
 'parent_asin': 'B00YQ6X8EO',
 'user_id': 'AGKHLEW2SOWHNMFQIJGBECAF7INQ',
 'timestamp': 1588687728923,
 'helpful_vote': 0,
 'verified_purchase': True}

In [None]:
from collections import defaultdict

user2reviews = defaultdict(list)
for review in review_list:
    if review['user_id'] in user_set and review['asin'] in item_set:
        user2reviews[review['user_id']].append({
            'rating': review['rating'],
            'title': review['title'],
            'text': review['text'],
            'item_id': meta_data['item2id'][review['asin']],
            'timestamp': review['timestamp'],
            'helpful_vote': review['helpful_vote'],
            'verified_purchase': review['verified_purchase'],
        })

    

In [21]:
# save user2reviews
with open('seq_rec_results/dataset/processed/All_Beauty/All_Beauty.reviews', 'w') as f:
    json.dump(user2reviews, f)

In [4]:
import outlines
from transformers import AutoTokenizer, AutoModelForCausalLM


MODEL_NAME = "meta-llama/Llama-3.1-8B-Instruct"
model = outlines.from_transformers(
    AutoModelForCausalLM.from_pretrained(MODEL_NAME, device_map="auto"),
    AutoTokenizer.from_pretrained(MODEL_NAME)
)

Loading checkpoint shards: 100%|██████████| 4/4 [00:12<00:00,  3.05s/it]


In [5]:
from pydantic import BaseModel
from enum import Enum

class Rating(Enum):
    poor = 1
    fair = 2
    good = 3
    excellent = 4

class ProductReview(BaseModel):
    rating: Rating
    pros: list[str]
    cons: list[str]
    summary: str

review = model(
    "Review: The XPS 13 has great battery life and a stunning display, but it runs hot and the webcam is poor quality.",
    ProductReview,
    max_new_tokens=200,
)

review = ProductReview.model_validate_json(review)
print(f"Rating: {review.rating.name}")  # "Rating: good"
print(f"Pros: {review.pros}")           # "Pros: ['great battery life', 'stunning display']"
print(f"Summary: {review.summary}")     # "Summary: Good laptop with great display but thermal issues"

Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Rating: excellent
Pros: ['long battery life', 'stunning display', 'portable']
Summary: The Dell XPS 13 is a solid laptop that excels in battery life and display quality, but it has some drawbacks that prevent it from being a top contender in its class.


In [1]:
from enum import Enum
from pydantic import BaseModel
from vllm import LLM, SamplingParams
from vllm.sampling_params import GuidedDecodingParams

# 定义 JSON schema via Pydantic 模型
class Person(BaseModel):
    name: str
    age: int
    email: str

json_schema = Person.model_json_schema()

def main():
    llm = LLM(model="Qwen/Qwen2.5-3B-Instruct", max_model_len=100)

    # 使用 regex 强制输出形式，例如 email 格式
    guided_regex = r'"\s*email"\s*:\s*"[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.[A-Za-z]{2,}"'
    guided_decoding_params_regex = GuidedDecodingParams(regex=guided_regex)
    sampling_params_regex = SamplingParams(
        guided_decoding=guided_decoding_params_regex,
        max_tokens=50
    )

    prompt_regex = (
        "Generate a JSON object with fields name, age, and email about a scientist:\n"
        "{\n"
        '  "name": "Ada Lovelace",\n'
        '  "age": 36,\n'
        '  "email": "ada.lovelace@example.com"\n'
        "}"
    )

    out_regex = llm.generate(prompts=prompt_regex, sampling_params=sampling_params_regex)
    print("Regex-constrained output:")
    print(out_regex[0].outputs[0].text)

    # 使用 JSON schema 强制输出整个结构符合 Person 模型
    guided_decoding_params_json = GuidedDecodingParams(json=json_schema)
    sampling_params_json = SamplingParams(
        guided_decoding=guided_decoding_params_json,
        max_tokens=100
    )

    prompt_json = (
        "Generate a JSON object about a historical scientist with name, age (integer), and email."
    )

    out_json = llm.generate(prompts=prompt_json, sampling_params=sampling_params_json)
    print("JSON-schema-constrained output:")
    print(out_json[0].outputs[0].text)


if __name__ == "__main__":
    main()

  from .autonotebook import tqdm as notebook_tqdm


INFO 09-16 21:20:04 [__init__.py:241] Automatically detected platform cuda.
INFO 09-16 21:20:05 [utils.py:326] non-default args: {'model': 'Qwen/Qwen2.5-3B-Instruct', 'max_model_len': 100, 'disable_log_stats': True}
INFO 09-16 21:20:15 [__init__.py:711] Resolved architecture: Qwen2ForCausalLM


`torch_dtype` is deprecated! Use `dtype` instead!


INFO 09-16 21:20:15 [__init__.py:1750] Using max model len 100


2025-09-16 21:20:15,896	INFO util.py:154 -- Missing packages: ['ipywidgets']. Run `pip install -U ipywidgets`, then restart the notebook server for rich notebook output.


INFO 09-16 21:20:16 [scheduler.py:222] Chunked prefill is enabled with max_num_batched_tokens=8192.
[1;36m(EngineCore_0 pid=1621821)[0;0m INFO 09-16 21:20:18 [core.py:636] Waiting for init message from front-end.
[1;36m(EngineCore_0 pid=1621821)[0;0m INFO 09-16 21:20:18 [core.py:74] Initializing a V1 LLM engine (v0.10.1.1) with config: model='Qwen/Qwen2.5-3B-Instruct', speculative_config=None, tokenizer='Qwen/Qwen2.5-3B-Instruct', skip_tokenizer_init=False, tokenizer_mode=auto, revision=None, override_neuron_config={}, tokenizer_revision=None, trust_remote_code=False, dtype=torch.bfloat16, max_seq_len=100, download_dir=None, load_format=auto, tensor_parallel_size=1, pipeline_parallel_size=1, disable_custom_all_reduce=False, quantization=None, enforce_eager=False, kv_cache_dtype=auto, device_config=cuda, decoding_config=DecodingConfig(backend='auto', disable_fallback=False, disable_any_whitespace=False, disable_additional_properties=False, reasoning_backend=''), observability_config

Loading safetensors checkpoint shards:   0% Completed | 0/2 [00:00<?, ?it/s]
Loading safetensors checkpoint shards:  50% Completed | 1/2 [00:00<00:00,  1.45it/s]
Loading safetensors checkpoint shards: 100% Completed | 2/2 [00:01<00:00,  1.04s/it]
Loading safetensors checkpoint shards: 100% Completed | 2/2 [00:01<00:00,  1.01it/s]
[1;36m(EngineCore_0 pid=1621821)[0;0m 


[1;36m(EngineCore_0 pid=1621821)[0;0m INFO 09-16 21:20:58 [default_loader.py:262] Loading weights took 2.10 seconds
[1;36m(EngineCore_0 pid=1621821)[0;0m INFO 09-16 21:20:59 [gpu_model_runner.py:2007] Model loading took 5.7916 GiB and 35.112943 seconds
[1;36m(EngineCore_0 pid=1621821)[0;0m INFO 09-16 21:21:11 [backends.py:548] Using cache directory: /home/zzheng3/.cache/vllm/torch_compile_cache/0a9ab1607d/rank_0_0/backbone for vLLM's torch.compile
[1;36m(EngineCore_0 pid=1621821)[0;0m INFO 09-16 21:21:11 [backends.py:559] Dynamo bytecode transform time: 12.01 s
[1;36m(EngineCore_0 pid=1621821)[0;0m INFO 09-16 21:21:17 [backends.py:194] Cache the graph for dynamic shape for later use
[1;36m(EngineCore_0 pid=1621821)[0;0m INFO 09-16 21:21:54 [backends.py:215] Compiling a graph for dynamic shape takes 40.86 s
[1;36m(EngineCore_0 pid=1621821)[0;0m INFO 09-16 21:22:10 [monitor.py:34] torch.compile takes 52.87 s in total
[1;36m(EngineCore_0 pid=1621821)[0;0m INFO 09-16 21:22:

Capturing CUDA graphs (mixed prefill-decode, PIECEWISE): 100%|██████████| 67/67 [00:03<00:00, 18.75it/s]


[1;36m(EngineCore_0 pid=1621821)[0;0m INFO 09-16 21:22:16 [gpu_model_runner.py:2708] Graph capturing finished in 4 secs, took 0.57 GiB
[1;36m(EngineCore_0 pid=1621821)[0;0m INFO 09-16 21:22:16 [core.py:214] init engine (profile, create kv cache, warmup model) took 77.61 seconds
INFO 09-16 21:22:18 [llm.py:298] Supported_tasks: ['generate']


Adding requests: 100%|██████████| 1/1 [00:00<00:00, 37.90it/s]
Processed prompts: 100%|██████████| 1/1 [00:01<00:00,  1.96s/it, est. speed input: 24.55 toks/s, output: 25.57 toks/s]


Regex-constrained output:
"

 
   


Adding requests: 100%|██████████| 1/1 [00:00<00:00, 552.46it/s]
Processed prompts: 100%|██████████| 1/1 [00:00<00:00,  3.62it/s, est. speed input: 65.61 toks/s, output: 105.69 toks/s]


JSON-schema-constrained output:
{"name": "Isaac Newton", "age": 46, "email": "isaac.newton@cam.ac.uk"}
