In [1]:
# Import all required libraries
import torch
from datasets import load_dataset
from recbole.config import Config
from recbole.data import create_dataset, data_preparation
from recbole.model.general_recommender import BPR, Pop, ItemKNN, LightGCN
from recbole.model.sequential_recommender import SASRec
from recbole.utils import init_seed, init_logger, get_trainer

# Set torch.load compatibility
torch.serialization.add_safe_globals([dict, list, tuple, set])

print("All libraries imported successfully!")

  from .autonotebook import tqdm as notebook_tqdm


All libraries imported successfully!


## Recaller Training

In [None]:
# Load Amazon Reviews 2023 dataset using RecBole
from GRPO.data import get_base_config_dict
import os
for dataset_name in os.listdir('dataset'):
    try:
        config_5core = Config(
            model='SASRec',
            dataset=dataset_name, 
            config_dict=get_base_config_dict(dataset_name)
        )

        # ÂàõÂª∫5-coreËøáÊª§ÂêéÁöÑÊï∞ÊçÆÈõÜ
        print(f"=== Loading {dataset_name} ===")
        print("Creating 5-core filtered dataset...")
        from recbole.utils import init_seed as recbole_init_seed
        recbole_init_seed(seed=42, reproducibility=True)
        dataset_5core = create_dataset(config_5core)
        train_data_5core, valid_data_5core, test_data_5core = data_preparation(config_5core, dataset_5core)
        print(f"\nüìä 5-core Filtered Dataset Statistics:")
        print(dataset_5core)
        # train set stats
        import numpy as np
        print(train_data_5core.dataset)
        print(np.unique(train_data_5core.dataset.inter_feat['user_id'].numpy()).shape)
        print(valid_data_5core.dataset)
        print(np.unique(valid_data_5core.dataset.inter_feat['user_id'].numpy()).shape)
        print(test_data_5core.dataset)
        print(np.unique(test_data_5core.dataset.inter_feat['user_id'].numpy()).shape)
    except Exception as e:
        print(f"Error loading {dataset_name}: {e}")
        continue



In [None]:
# Define unified model training function
def train_model(model_type, dataset_name='All_Beauty', epochs=10, **kwargs):
    """
    Unified function to train recommendation models
    
    Args:
        model_type: Model type ('BPR', 'SASRec', 'Pop')
        dataset_name: Dataset name
        epochs: Training epochs¬∑
        **kwargs: Additional model-specific parameters
    
    Returns:
        dict: Dictionary containing model, trainer, config and results
    """
    
    print(f"\n=== Training {model_type} Model ===")
    
    # Base configuration
    base_config = {
        # 'data_path': 'seq_rec_results/dataset/processed/',
        # 'benchmark_filename': ['train', 'valid', 'test'],
        'epochs': epochs,
        'stopping_step': 10,
        'eval_step': 1,
        'metrics': ['Recall', 'NDCG'],
        'topk': [10, 50],
        'valid_metric': 'NDCG@10',
        'checkpoint_dir': './checkpoints/',
        'show_progress': True,
        'save_dataset': True,
        'save_dataloaders': True,
    }
    base_config.update({
        'data_path': 'dataset',
        'load_col': {
            # 'inter': ['user_id', 'item_id', 'rating', 'timestamp'],
            'inter': ['user_id', 'item_id', 'timestamp']
        },
        'user_inter_num_interval': "[5,inf)",
        'item_inter_num_interval': "[5,inf)",
        'train_neg_sample_args': None,
        'loss_type': 'CE',
        # 'val_interval': {
        #     'rating': '[3,inf)'  # Âè™‰øùÁïôrating >= 4ÁöÑ‰∫§‰∫í
        # },
        'eval_args': {
            'split': {'LS': 'valid_and_test'},  # Leave-One-Out
            'order': 'TO',  # Temporal Order
            'group_by': 'user'
        },
        'ITEM_ID_FIELD': 'item_id',
    })
    
    # Model-specific configurations
    if model_type == 'BPR':
        model_class = BPR
        model_config = {
            **base_config,
            'train_neg_sample_args': {
                'distribution': 'uniform',
                'sample_num': 1,
                'alpha': 1.0,
                'dynamic': False,
                'candidate_num': 0
            },
            'loss_type': 'BPR',
            'learning_rate': 0.001,
            'train_batch_size': 2048,
            'eval_batch_size': 2048 * 20000,
        }
        
    elif model_type == 'SASRec':
        model_class = SASRec
        model_config = {
            **base_config,
            'train_neg_sample_args': None,
            'loss_type': 'CE',
            'learning_rate': 0.001,
            'train_batch_size': 256,
            'max_seq_length': 50,
            'hidden_size': 64,
            'n_layers': 2,
            'n_heads': 2,
            'inner_size': 256,
            'hidden_dropout_prob': 0.5,
            'attn_dropout_prob': 0.5,
        }
        
    elif model_type == 'Pop':
        model_class = Pop
        model_config = {
            **base_config,
            'train_neg_sample_args': None,
        }
    elif model_type == 'ItemKNN':
        model_class = ItemKNN
        model_config = {
            **base_config,
            'train_neg_sample_args': None,
            'eval_batch_size': 2048 * 20000,
        }
    elif model_type == 'LightGCN':
        model_class = LightGCN
        model_config = {
            **base_config,
            # LightGCNÈúÄË¶ÅË¥üÈááÊ†∑
            'train_neg_sample_args': {
                'distribution': 'uniform',
                'sample_num': 1,  # ÊØè‰∏™Ê≠£Ê†∑Êú¨ÈÖç1‰∏™Ë¥üÊ†∑Êú¨
            },
            'loss_type': 'BPR',
            'embedding_size': 64,
            'n_layers': 3,  # GCNÂ±ÇÊï∞
            'reg_weight': 1e-5,  # Ê≠£ÂàôÂåñÁ≥ªÊï∞
            'learning_rate': 0.001,
            'train_batch_size': 2048,
            'eval_batch_size': 2048 * 20000,
        }
    elif model_type == 'SimpleX':
        from recbole.model.general_recommender import SimpleX
        model_class = SimpleX
        model_config = {
            **base_config,
            'train_neg_sample_args': {
                'distribution': 'uniform',
                'sample_num': 1,
            },
            'loss_type': 'BPR',
            'embedding_size': 64,
            'aggregator': 'mean',  # Êàñ 'user_attention', 'self_attention'
            'gamma': 0.5,
            'margin': 0.9,
            'negative_weight': 0.5,
            'reg_weight': 1e-5,
            'learning_rate': 0.001,
            'train_batch_size': 2048,
            'eval_batch_size': 2048 * 20000,
        }
    else:
        raise ValueError(f"Unsupported model type: {model_type}")
    
    # Merge user-defined parameters
    model_config.update(kwargs)
    
    # Create config and dataset
    config = Config(
        model=model_type,
        dataset=dataset_name,
        config_dict=model_config
    )
    config['dataset_save_path'] = f'{config["checkpoint_dir"]}/'
    # Create dataset
    model_dataset = create_dataset(config)
    train_data, valid_data, test_data = data_preparation(config, model_dataset)
    
    print(f"{model_type} dataset stats:")
    print(f"Users: {model_dataset.user_num}")
    print(f"Items: {model_dataset.item_num}")
    print(f"Interactions: {model_dataset.inter_num}")
    
    # Initialize model and trainer
    init_seed(config['seed'], config['reproducibility'])
    model = model_class(config, model_dataset).to(config['device'])
    trainer = get_trainer(config['MODEL_TYPE'], config['model'])(config, model)
    
    print(f"Training {model_type} model...")
    
    # torch.load compatibility settings
    original_load = torch.load
    def safe_load(*args, **kwargs):
        kwargs['weights_only'] = False
        return original_load(*args, **kwargs)
    torch.load = safe_load
    
    try:
        # Train model
        best_valid_score, best_valid_result = trainer.fit(
            train_data, valid_data, saved=True, show_progress=True
        )
        
        print(f"{model_type} training completed!")
        print(f"Best validation result: {best_valid_result}")
        
        # Test model
        test_result = trainer.evaluate(test_data, load_best_model=True, show_progress=True)
        print(f"{model_type} test result: {test_result}")
        
        return {
            'model_type': model_type,
            'model': model,
            'trainer': trainer,
            'config': config,
            'dataset': model_dataset,
            'train_data': train_data,
            'valid_data': valid_data,
            'test_data': test_data,
            'best_valid_result': best_valid_result,
            'test_result': test_result
        }
        
    finally:
        # Restore original torch.load function
        torch.load = original_load

print("Unified training function defined!")

Unified training function defined!


In [None]:
# Train all models using unified function
print("=== Training All Models with Unified Function ===")

# Store all model results
model_results = {}
dataset_name = "ml-1m"
# Train all three models
models_to_train = [
    {'model_type': 'BPR', 'epochs': 100},
    {'model_type': 'LightGCN', 'epochs': 100},
    {'model_type': 'SimpleX', 'epochs': 100},
    {'model_type': 'SASRec', 'epochs': 100},
]

for model_config in models_to_train:
        result = train_model(
            dataset_name=dataset_name,
            model_type=model_config['model_type'],
            epochs=model_config['epochs']
        )
        model_results[model_config['model_type']] = result
        print(f"‚úÖ {model_config['model_type']} training successful")

print(f"\nTraining completed! Successfully trained {len([r for r in model_results.values() if r is not None])} models")

=== Training All Models with Unified Function ===

=== Training BPR Model ===


BPR dataset stats:
Users: 6041
Items: 3417
Interactions: 999611
Training BPR model...


  scaler = amp.GradScaler(enabled=self.enable_scaler)
[1;35mTrain     0[0m:   0%|                         | 2/483 [00:00<02:34,  3.12it/s, [1;33mGPU RAM: 0.03 G/79.14 G[0m][0m

In [None]:
from recbole.utils import load_data_and_model

In [None]:
# Unified model performance comparison and analysis
def compare_models(model_results):
    """Compare all trained models"""
    
    print("\n" + "="*70)
    print("                Model Performance Comparison Report")
    print("="*70)
    
    # Extract test results
    results_summary = {}
    for model_type, result in model_results.items():
        if result is not None:
            results_summary[model_type] = result['test_result']
        else:
            results_summary[model_type] = {}
    
    # Create comparison table
    print(f"\n{'Metric':<15}", end="")
    model_names = list(results_summary.keys())
    for name in model_names:
        print(f" | {name:<12}", end="")
    print()
    print("-" * (15 + 15 * len(model_names)))
    
    metrics_to_compare = ['recall@10', 'ndcg@10', 'recall@20', 'ndcg@20']
    best_scores = {}
    
    for metric in metrics_to_compare:
        print(f"{metric:<15}", end="")
        metric_values = []
        
        for model_type in model_names:
            value = results_summary[model_type].get(metric, 'N/A')
            if value != 'N/A':
                print(f" | {value:<12.4f}", end="")
                metric_values.append((model_type, value))
            else:
                print(f" | {'N/A':<12}", end="")
        
        print()
        
        # Find best model
        if metric_values:
            best_model, best_score = max(metric_values, key=lambda x: x[1])
            best_scores[metric] = (best_model, best_score)
    
    print("-" * (15 + 15 * len(model_names)))
    
    # Analyze best models
    print(f"\nüèÜ Best model for each metric:")
    for metric, (best_model, best_score) in best_scores.items():
        print(f"  {metric}: {best_model} ({best_score:.4f})")
    
    # Overall model ranking
    model_scores = {name: [] for name in model_names}
    for metric in metrics_to_compare:
        for model_type in model_names:
            value = results_summary[model_type].get(metric, 0)
            if value != 'N/A' and value != 0:
                model_scores[model_type].append(value)
    
    avg_scores = {name: sum(scores)/len(scores) if scores else 0 
                  for name, scores in model_scores.items()}
    
    print(f"\nüìä Overall model ranking:")
    sorted_models = sorted(avg_scores.items(), key=lambda x: x[1], reverse=True)
    for i, (model, score) in enumerate(sorted_models, 1):
        print(f"  {i}. {model}: {score:.4f} (avg score)")
    
    # Model characteristics analysis
    print(f"\nüìù Model characteristics:")
    model_analysis = {
        'Pop': 'Item popularity based, fast training, good for cold start',
        'BPR': 'Collaborative filtering, personalized, balanced performance',  
        'SASRec': 'Sequential recommendation, temporal patterns, rich historical data'
    }
    
    for model_type in model_names:
        if model_type in model_analysis:
            status = "‚úÖ Success" if model_results[model_type] else "‚ùå Failed"
            print(f"  ‚Ä¢ {model_type}: {model_analysis[model_type]} [{status}]")
    
    return best_scores, sorted_models

# Execute model comparison
if model_results:
    best_scores, model_ranking = compare_models(model_results)
else:
    print("‚ö†Ô∏è No model results to compare, please run model training first")

## Dataset Test

In [None]:
import json
import os
domain = 'Amazon_All_Beauty'

In [None]:
from datasets import load_dataset
# domain = 'All_Beauty'
# meta_data = json.load(open(f'seq_rec_results/dataset/processed/{domain}/{domain}.data_maps'))
datasets = load_dataset(
    "McAuley-Lab/Amazon-Reviews-2023",
    f"5core_timestamp_w_his_{domain}",
    trust_remote_code=True
)
raw_review_path = f'dataset/{domain}/{domain}.jsonl'
if not os.path.exists(raw_review_path):
    print(f'Downloading {domain} reviews from Hugging Face...')
    import wget
    wget.download(f'https://huggingface.co/datasets/McAuley-Lab/Amazon-Reviews-2023/resolve/main/raw/review_categories/{domain}.jsonl?download=true', raw_review_path)
# len(meta_data['item2id']), len(meta_data['user2id'])

In [None]:
from tqdm import tqdm
review_list = []
with open(raw_review_path, 'r') as f:
    for line in tqdm(f):
        review_list.append(json.loads(line))


In [None]:
user_set = set(meta_data['user2id'].keys())
item_set = set(meta_data['item2id'].keys())

In [None]:
review_list[0]

In [None]:
from collections import defaultdict

user2reviews = defaultdict(list)
for review in tqdm(review_list):
    if review['user_id'] in user_set and review['asin'] in item_set:
        user2reviews[review['user_id']].append({
            'asin': review['asin'],
            'rating': review['rating'],
            'title': review['title'],
            'text': review['text'],
            'item_id': meta_data['item2id'][review['asin']],
            'timestamp': review['timestamp'],
            'helpful_vote': review['helpful_vote'],
            'verified_purchase': review['verified_purchase'],
        })

    

In [None]:
# save user2reviews
with open(f'seq_rec_results/dataset/processed/{domain}/{domain}.reviews', 'w') as f:
    json.dump(user2reviews, f)

## General Test

In [None]:
import outlines
from transformers import AutoTokenizer, AutoModelForCausalLM


MODEL_NAME = "meta-llama/Llama-3.1-8B-Instruct"
model = outlines.from_transformers(
    AutoModelForCausalLM.from_pretrained(MODEL_NAME, device_map="auto"),
    AutoTokenizer.from_pretrained(MODEL_NAME)
)

In [None]:
from pydantic import BaseModel
from enum import Enum

class Rating(Enum):
    poor = 1
    fair = 2
    good = 3
    excellent = 4

class ProductReview(BaseModel):
    rating: Rating
    pros: list[str]
    cons: list[str]
    summary: str

review = model(
    "Review: The XPS 13 has great battery life and a stunning display, but it runs hot and the webcam is poor quality.",
    ProductReview,
    max_new_tokens=200,
)

review = ProductReview.model_validate_json(review)
print(f"Rating: {review.rating.name}")  # "Rating: good"
print(f"Pros: {review.pros}")           # "Pros: ['great battery life', 'stunning display']"
print(f"Summary: {review.summary}")     # "Summary: Good laptop with great display but thermal issues"

## vLLM Test

In [None]:
from enum import Enum
from pydantic import BaseModel
from vllm import LLM, SamplingParams
from vllm.sampling_params import GuidedDecodingParams

# ÂÆö‰πâ JSON schema via Pydantic Ê®°Âûã
class Person(BaseModel):
    name: str
    age: int
    email: str

json_schema = Person.model_json_schema()

def main():
    llm = LLM(model="Qwen/Qwen2.5-3B-Instruct", max_model_len=100)

    # ‰ΩøÁî® regex Âº∫Âà∂ËæìÂá∫ÂΩ¢ÂºèÔºå‰æãÂ¶Ç email Ê†ºÂºè
    guided_regex = r'"\s*email"\s*:\s*"[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.[A-Za-z]{2,}"'
    guided_decoding_params_regex = GuidedDecodingParams(regex=guided_regex)
    sampling_params_regex = SamplingParams(
        guided_decoding=guided_decoding_params_regex,
        max_tokens=50
    )

    prompt_regex = (
        "Generate a JSON object with fields name, age, and email about a scientist:\n"
        "{\n"
        '  "name": "Ada Lovelace",\n'
        '  "age": 36,\n'
        '  "email": "ada.lovelace@example.com"\n'
        "}"
    )

    out_regex = llm.generate(prompts=prompt_regex, sampling_params=sampling_params_regex)
    print("Regex-constrained output:")
    print(out_regex[0].outputs[0].text)

    # ‰ΩøÁî® JSON schema Âº∫Âà∂ËæìÂá∫Êï¥‰∏™ÁªìÊûÑÁ¨¶Âêà Person Ê®°Âûã
    guided_decoding_params_json = GuidedDecodingParams(json=json_schema)
    sampling_params_json = SamplingParams(
        guided_decoding=guided_decoding_params_json,
        max_tokens=100
    )

    prompt_json = (
        "Generate a JSON object about a historical scientist with name, age (integer), and email."
    )

    out_json = llm.generate(prompts=prompt_json, sampling_params=sampling_params_json)
    print("JSON-schema-constrained output:")
    print(out_json[0].outputs[0].text)


if __name__ == "__main__":
    main()

## Draw Heatmap

In [None]:
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np

recallers = ['bpr', 'sasrec', 'fpmc', 'pop', 'itemknn']
similarity_dict = {
    "ml-1m": {
        "jaccard": {
            "bpr_vs_sasrec": 0.34273530362884574,
            "bpr_vs_fpmc": 0.46105757444633155,
            "bpr_vs_pop": 0.444434263661674,
            "bpr_vs_itemknn": 0.6134421115995813,
            "sasrec_vs_fpmc": 0.38874815022721565,
            "sasrec_vs_pop": 0.2539883657580121,
            "sasrec_vs_itemknn": 0.338885181548484,
            "fpmc_vs_pop": 0.31859217811453044,
            "fpmc_vs_itemknn": 0.40275755777363903,
            "pop_vs_itemknn": 0.47005992461972157,
            "average": 0.40347006113780354
        },
        "rbo": {
            "bpr_vs_sasrec": 0.08258759583764952,
            "bpr_vs_fpmc": 0.13135964670604944,
            "bpr_vs_pop": 0.2577950441433609,
            "bpr_vs_itemknn": 0.34590342620220366,
            "sasrec_vs_fpmc": 0.19305723421020202,
            "sasrec_vs_pop": 0.03979166691424811,
            "sasrec_vs_itemknn": 0.0992836502099173,
            "fpmc_vs_pop": 0.08083980497851037,
            "fpmc_vs_itemknn": 0.08839267603482477,
            "pop_vs_itemknn": 0.1641174113709889,
            "average": 0.1483128156607955
        }
    },
    "steam": {
        "jaccard": {
            "bpr_vs_itemknn": 0.6308220748941968,
            "bpr_vs_fpmc": 0.5746376430231429,
            "bpr_vs_pop": 0.854984443673129,
            "bpr_vs_sasrec": 0.5508810983313444,
            "itemknn_vs_fpmc": 0.5039287084961207,
            "itemknn_vs_pop": 0.6003215868790582,
            "itemknn_vs_sasrec": 0.4712966038266744,
            "fpmc_vs_pop": 0.5317308405072102,
            "fpmc_vs_sasrec": 0.4909579379310822,
            "pop_vs_sasrec": 0.5320244418094473,
            "average": 0.5741585379371407
        },
        "rbo": {
            "bpr_vs_itemknn": 0.5976655030859763,
            "bpr_vs_fpmc": 0.5909183916013645,
            "bpr_vs_pop": 0.3731503703535082,
            "bpr_vs_sasrec": 0.42053214798331184,
            "itemknn_vs_fpmc": 0.48833146395151233,
            "itemknn_vs_pop": 0.2561764282236138,
            "itemknn_vs_sasrec": 0.3593697550375676,
            "fpmc_vs_pop": 0.2526088409611869,
            "fpmc_vs_sasrec": 0.4397719959901045,
            "pop_vs_sasrec": 0.21928076296399454,
            "average": 0.39978056601521406
        }
    },
    "music": {
        "jaccard": {
            "bpr_vs_itemknn": 0.05931017970127901,
            "bpr_vs_fpmc": 0.19183792768378685,
            "bpr_vs_pop": 0.3486857576508974,
            "bpr_vs_sasrec": 0.2763329019130139,
            "itemknn_vs_fpmc": 0.04500210245874656,
            "itemknn_vs_pop": 0.05856352597831993,
            "itemknn_vs_sasrec": 0.05318460250694704,
            "fpmc_vs_pop": 0.11867892641235088,
            "fpmc_vs_sasrec": 0.17417130349015045,
            "pop_vs_sasrec": 0.18694072081894209,
            "average": 0.15127079486144343
        },
        "rbo": {
            "bpr_vs_itemknn": 0.04553230390159534,
            "bpr_vs_fpmc": 0.19411234240110908,
            "bpr_vs_pop": 0.29511417000419926,
            "bpr_vs_sasrec": 0.2600795002482322,
            "itemknn_vs_fpmc": 0.03326785313655516,
            "itemknn_vs_pop": 0.022283683544130312,
            "itemknn_vs_sasrec": 0.03514783197819881,
            "fpmc_vs_pop": 0.13908358114051256,
            "fpmc_vs_sasrec": 0.1597916601956278,
            "pop_vs_sasrec": 0.1450037198043686,
            "average": 0.1329416646354529
        }
    }
}


for dataset_name in similarity_dict.keys():
    print(f'========== {dataset_name} ==========' )
    for similarity_type in similarity_dict[dataset_name].keys():

        data = np.zeros((len(recallers), len(recallers)))
        recaller2idx = {recaller: i for i, recaller in enumerate(recallers)}
        idx2recaller = {i: recaller for i, recaller in enumerate(recallers)}
        for recaller1 in recallers:
            for recaller2 in recallers:
                if recaller1 == recaller2:
                    data[recaller2idx[recaller1], recaller2idx[recaller2]] = 1.0
                else:
                    if f"{recaller1}_vs_{recaller2}" not in similarity_dict[dataset_name][similarity_type]:
                        data[recaller2idx[recaller1], recaller2idx[recaller2]] = similarity_dict[dataset_name][similarity_type][f"{recaller2}_vs_{recaller1}"]
                    else:
                        data[recaller2idx[recaller1], recaller2idx[recaller2]] = similarity_dict[dataset_name][similarity_type][f"{recaller1}_vs_{recaller2}"]

        # ËÆæÁΩÆÊ†áÁ≠æÔºàÊ®™Á∫µÂùêÊ†á 1-9Ôºâ
        labels = [idx2recaller[i] for i in range(len(recallers))]

        plt.figure(figsize=(8, 6))
        sns.heatmap(
            data,
            xticklabels=labels,
            yticklabels=labels,
            cmap="Blues",
            annot=True,
            fmt=".2f",
            cbar=True,
            square=True
        )
        print(similarity_type)
        print(idx2recaller)
        plt.show()

## General Test

In [None]:
from transformers import AutoModelForCausalLM, AutoTokenizer
from transformers.trainer_utils import get_last_checkpoint

output_dir = "GRPO/grpo_models"
last_checkpoint = get_last_checkpoint(output_dir)

tokenizer = AutoTokenizer.from_pretrained(last_checkpoint)
model = AutoModelForCausalLM.from_pretrained(last_checkpoint, trust_remote_code=True, device_map="auto")

In [None]:
import outlines
model = outlines.from_transformers(model, tokenizer)


In [None]:
from typing import Literal
from pydantic import BaseModel


# Simple classification
sentiment = model(
    "Analyze: 'This product completely changed my life!'",
    Literal["Positive", "Negative", "Neutral"]
)
print(sentiment)  # "Positive"

# Extract specific types
temperature = model("What's the boiling point of water in Celsius?", int)
print(temperature)  # 100

In [None]:
from pydantic import BaseModel
from enum import Enum

class Rating(Enum):
    poor = 1
    fair = 2
    good = 3
    excellent = 4

class ProductReview(BaseModel):
    rating: Rating
    pros: list[str]
    cons: list[str]
    summary: str

review = model(
    ["Review: The XPS 13 has great battery life and a stunning display, but it runs hot and the webcam is poor quality.", 
    "Review: The XPS 13 has great battery life and a stunning display, but it runs hot and the webcam is poor quality.",],
    ProductReview,
    max_new_tokens=200,
)

In [None]:
review

=== Loading steam ===
Creating 5-core filtered dataset...
/home/zzheng3/miniconda3/envs/rs/lib/python3.12/site-packages/recbole/data/dataset/dataset.py:648: FutureWarning: A value is trying to be set on a copy of a DataFrame or Series through chained assignment using an inplace method.
The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  feat[field].fillna(value=0, inplace=True)
/home/zzheng3/miniconda3/envs/rs/lib/python3.12/site-packages/recbole/data/dataset/dataset.py:650: FutureWarning: A value is trying to be set on a copy of a DataFrame or Series through chained assignment using an inplace method.
The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  feat[field].fillna(value=feat[field].mean(), inplace=True)

:bar_chart: 5-core Filtered Dataset Statistics:
[1;35msteam[0m
[1;34mThe number of users[0m: 25390
[1;34mAverage actions of users[0m: 11.929930284768995
[1;34mThe number of items[0m: 4090
[1;34mAverage actions of items[0m: 74.07410124724872
[1;34mThe number of inters[0m: 302889
[1;34mThe sparsity of the dataset[0m: 99.70832615116169%
[1;34mRemain Fields[0m: ['user_id', 'product_id', 'timestamp', 'product_id_list', 'timestamp_list', 'item_length']
[1;35msteam[0m
[1;34mThe number of users[0m: 25390
[1;34mAverage actions of users[0m: 9.929930284768995
[1;34mThe number of items[0m: 4090
[1;34mAverage actions of items[0m: 61.701174743024964
[1;34mThe number of inters[0m: 252111
[1;34mThe sparsity of the dataset[0m: 99.75722398071744%
[1;34mRemain Fields[0m: ['user_id', 'product_id', 'timestamp', 'product_id_list', 'timestamp_list', 'item_length']
(25389,)
[1;35msteam[0m
[1;34mThe number of users[0m: 25390
[1;34mAverage actions of users[0m: 1.0
[1;34mThe number of items[0m: 4090
[1;34mAverage actions of items[0m: 9.252551020408163
[1;34mThe number of inters[0m: 25389
[1;34mThe sparsity of the dataset[0m: 99.97555108522212%
[1;34mRemain Fields[0m: ['user_id', 'product_id', 'timestamp', 'product_id_list', 'timestamp_list', 'item_length']
(25389,)
[1;35msteam[0m
[1;34mThe number of users[0m: 25390
[1;34mAverage actions of users[0m: 1.0
[1;34mThe number of items[0m: 4090
[1;34mAverage actions of items[0m: 10.027251184834123
[1;34mThe number of inters[0m: 25389
[1;34mThe sparsity of the dataset[0m: 99.97555108522212%
[1;34mRemain Fields[0m: ['user_id', 'product_id', 'timestamp', 'product_id_list', 'timestamp_list', 'item_length']
(25389,)
=== Loading ml-10m ===
Creating 5-core filtered dataset...
/home/zzheng3/miniconda3/envs/rs/lib/python3.12/site-packages/recbole/data/dataset/dataset.py:648: FutureWarning: A value is trying to be set on a copy of a DataFrame or Series through chained assignment using an inplace method.
The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  feat[field].fillna(value=0, inplace=True)
/home/zzheng3/miniconda3/envs/rs/lib/python3.12/site-packages/recbole/data/dataset/dataset.py:650: FutureWarning: A value is trying to be set on a copy of a DataFrame or Series through chained assignment using an inplace method.
The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  feat[field].fillna(value=feat[field].mean(), inplace=True)

:bar_chart: 5-core Filtered Dataset Statistics:
[1;35mml-10m[0m
[1;34mThe number of users[0m: 69815
[1;34mAverage actions of users[0m: 117.03065287764632
[1;34mThe number of items[0m: 9889
[1;34mAverage actions of items[0m: 826.2922734627832
[1;34mThe number of inters[0m: 8170378
[1;34mThe sparsity of the dataset[0m: 98.81657420789803%
[1;34mRemain Fields[0m: ['user_id', 'item_id', 'rating', 'timestamp', 'item_id_list', 'rating_list', 'timestamp_list', 'item_length']
[1;35mml-10m[0m
[1;34mThe number of users[0m: 69815
[1;34mAverage actions of users[0m: 115.03065287764632
[1;34mThe number of items[0m: 9889
[1;34mAverage actions of items[0m: 812.1713187702265
[1;34mThe number of inters[0m: 8030750
[1;34mThe sparsity of the dataset[0m: 98.83679841007076%
[1;34mRemain Fields[0m: ['user_id', 'item_id', 'rating', 'timestamp', 'item_id_list', 'rating_list', 'timestamp_list', 'item_length']
(69814,)
[1;35mml-10m[0m
[1;34mThe number of users[0m: 69815
[1;34mAverage actions of users[0m: 1.0
[1;34mThe number of items[0m: 9889
[1;34mAverage actions of items[0m: 11.552871090517955
[1;34mThe number of inters[0m: 69814
[1;34mThe sparsity of the dataset[0m: 99.98988789891364%
[1;34mRemain Fields[0m: ['user_id', 'item_id', 'rating', 'timestamp', 'item_id_list', 'rating_list', 'timestamp_list', 'item_length']
(69814,)
[1;35mml-10m[0m
[1;34mThe number of users[0m: 69815
[1;34mAverage actions of users[0m: 1.0
[1;34mThe number of items[0m: 9889
[1;34mAverage actions of items[0m: 11.064025356576861
[1;34mThe number of inters[0m: 69814
[1;34mThe sparsity of the dataset[0m: 99.98988789891364%
[1;34mRemain Fields[0m: ['user_id', 'item_id', 'rating', 'timestamp', 'item_id_list', 'rating_list', 'timestamp_list', 'item_length']
(69814,)
=== Loading Amazon_Toys_and_Games ===
Creating 5-core filtered dataset...
/home/zzheng3/miniconda3/envs/rs/lib/python3.12/site-packages/recbole/data/dataset/dataset.py:648: FutureWarning: A value is trying to be set on a copy of a DataFrame or Series through chained assignment using an inplace method.
The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  feat[field].fillna(value=0, inplace=True)
/home/zzheng3/miniconda3/envs/rs/lib/python3.12/site-packages/recbole/data/dataset/dataset.py:650: FutureWarning: A value is trying to be set on a copy of a DataFrame or Series through chained assignment using an inplace method.
The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  feat[field].fillna(value=feat[field].mean(), inplace=True)

:bar_chart: 5-core Filtered Dataset Statistics:
[1;35mAmazon_Toys_and_Games[0m
[1;34mThe number of users[0m: 360660
[1;34mAverage actions of users[0m: 7.992821474023939
[1;34mThe number of items[0m: 143927
[1;34mAverage actions of items[0m: 20.03031629561689
[1;34mThe number of inters[0m: 2882683
[1;34mThe sparsity of the dataset[0m: 99.99444662967173%
[1;34mRemain Fields[0m: ['user_id', 'item_id', 'rating', 'timestamp', 'item_id_list', 'rating_list', 'timestamp_list', 'item_length']
[1;35mAmazon_Toys_and_Games[0m
[1;34mThe number of users[0m: 360660
[1;34mAverage actions of users[0m: 5.992821474023939
[1;34mThe number of items[0m: 143927
[1;34mAverage actions of items[0m: 15.07564449528486
[1;34mThe number of inters[0m: 2161365
[1;34mThe sparsity of the dataset[0m: 99.99583621915433%
[1;34mRemain Fields[0m: ['user_id', 'item_id', 'rating', 'timestamp', 'item_id_list', 'rating_list', 'timestamp_list', 'item_length']
(360659,)
[1;35mAmazon_Toys_and_Games[0m
[1;34mThe number of users[0m: 360660
[1;34mAverage actions of users[0m: 1.0
[1;34mThe number of items[0m: 143927
[1;34mAverage actions of items[0m: 3.597631896577522
[1;34mThe number of inters[0m: 360659
[1;34mThe sparsity of the dataset[0m: 99.99930520525871%
[1;34mRemain Fields[0m: ['user_id', 'item_id', 'rating', 'timestamp', 'item_id_list', 'rating_list', 'timestamp_list', 'item_length']
(360659,)
[1;35mAmazon_Toys_and_Games[0m
[1;34mThe number of users[0m: 360660
[1;34mAverage actions of users[0m: 1.0
[1;34mThe number of items[0m: 143927
[1;34mAverage actions of items[0m: 3.7043477367734514
[1;34mThe number of inters[0m: 360659
[1;34mThe sparsity of the dataset[0m: 99.99930520525871%
[1;34mRemain Fields[0m: ['user_id', 'item_id', 'rating', 'timestamp', 'item_id_list', 'rating_list', 'timestamp_list', 'item_length']
(360659,)
=== Loading ml-1m ===
Creating 5-core filtered dataset...
/home/zzheng3/miniconda3/envs/rs/lib/python3.12/site-packages/recbole/data/dataset/dataset.py:648: FutureWarning: A value is trying to be set on a copy of a DataFrame or Series through chained assignment using an inplace method.
The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  feat[field].fillna(value=0, inplace=True)
/home/zzheng3/miniconda3/envs/rs/lib/python3.12/site-packages/recbole/data/dataset/dataset.py:650: FutureWarning: A value is trying to be set on a copy of a DataFrame or Series through chained assignment using an inplace method.
The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  feat[field].fillna(value=feat[field].mean(), inplace=True)

:bar_chart: 5-core Filtered Dataset Statistics:
[1;35mml-1m[0m
[1;34mThe number of users[0m: 6039
[1;34mAverage actions of users[0m: 137.42149718449818
[1;34mThe number of items[0m: 3308
[1;34mAverage actions of items[0m: 250.9074690051406
[1;34mThe number of inters[0m: 829751
[1;34mThe sparsity of the dataset[0m: 95.84647093369118%
[1;34mRemain Fields[0m: ['user_id', 'item_id', 'rating', 'timestamp', 'item_id_list', 'rating_list', 'timestamp_list', 'item_length']
[1;35mml-1m[0m
[1;34mThe number of users[0m: 6039
[1;34mAverage actions of users[0m: 135.42149718449818
[1;34mThe number of items[0m: 3308
[1;34mAverage actions of items[0m: 247.25582098578772
[1;34mThe number of inters[0m: 817675
[1;34mThe sparsity of the dataset[0m: 95.90692041432422%
[1;34mRemain Fields[0m: ['user_id', 'item_id', 'rating', 'timestamp', 'item_id_list', 'rating_list', 'timestamp_list', 'item_length']
(6038,)
[1;35mml-1m[0m
[1;34mThe number of users[0m: 6039
[1;34mAverage actions of users[0m: 1.0
[1;34mThe number of items[0m: 3308
[1;34mAverage actions of items[0m: 3.244492208490059
[1;34mThe number of inters[0m: 6038
[1;34mThe sparsity of the dataset[0m: 99.96977525968347%
[1;34mRemain Fields[0m: ['user_id', 'item_id', 'rating', 'timestamp', 'item_id_list', 'rating_list', 'timestamp_list', 'item_length']
(6038,)
[1;35mml-1m[0m
[1;34mThe number of users[0m: 6039
[1;34mAverage actions of users[0m: 1.0
[1;34mThe number of items[0m: 3308
[1;34mAverage actions of items[0m: 3.3940415964024733
[1;34mThe number of inters[0m: 6038
[1;34mThe sparsity of the dataset[0m: 99.96977525968347%
[1;34mRemain Fields[0m: ['user_id', 'item_id', 'rating', 'timestamp', 'item_id_list', 'rating_list', 'timestamp_list', 'item_length']
(6038,)
=== Loading yelp2022 ===
Creating 5-core filtered dataset...
/home/zzheng3/miniconda3/envs/rs/lib/python3.12/site-packages/recbole/data/dataset/dataset.py:648: FutureWarning: A value is trying to be set on a copy of a DataFrame or Series through chained assignment using an inplace method.
The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  feat[field].fillna(value=0, inplace=True)
/home/zzheng3/miniconda3/envs/rs/lib/python3.12/site-packages/recbole/data/dataset/dataset.py:650: FutureWarning: A value is trying to be set on a copy of a DataFrame or Series through chained assignment using an inplace method.
The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  feat[field].fillna(value=feat[field].mean(), inplace=True)

:bar_chart: 5-core Filtered Dataset Statistics:
[1;35myelp2022[0m
[1;34mThe number of users[0m: 207649
[1;34mAverage actions of users[0m: 14.340557096625059
[1;34mThe number of items[0m: 89204
[1;34mAverage actions of items[0m: 33.38252505549203
[1;34mThe number of inters[0m: 2977788
[1;34mThe sparsity of the dataset[0m: 99.98392394059113%
[1;34mRemain Fields[0m: ['user_id', 'item_id', 'rating', 'timestamp', 'item_id_list', 'rating_list', 'timestamp_list', 'item_length']
[1;35myelp2022[0m
[1;34mThe number of users[0m: 207649
[1;34mAverage actions of users[0m: 12.340557096625059
[1;34mThe number of items[0m: 89204
[1;34mAverage actions of items[0m: 28.747470214723236
[1;34mThe number of inters[0m: 2562492
[1;34mThe sparsity of the dataset[0m: 99.98616598171972%
[1;34mRemain Fields[0m: ['user_id', 'item_id', 'rating', 'timestamp', 'item_id_list', 'rating_list', 'timestamp_list', 'item_length']
(207648,)
[1;35myelp2022[0m
[1;34mThe number of users[0m: 207649
[1;34mAverage actions of users[0m: 1.0
[1;34mThe number of items[0m: 89204
[1;34mAverage actions of items[0m: 3.85118142364331
[1;34mThe number of inters[0m: 207648
[1;34mThe sparsity of the dataset[0m: 99.9988789794357%
[1;34mRemain Fields[0m: ['user_id', 'item_id', 'rating', 'timestamp', 'item_id_list', 'rating_list', 'timestamp_list', 'item_length']
(207648,)
[1;35myelp2022[0m
[1;34mThe number of users[0m: 207649
[1;34mAverage actions of users[0m: 1.0
[1;34mThe number of items[0m: 89204
[1;34mAverage actions of items[0m: 3.849539311469939
[1;34mThe number of inters[0m: 207648
[1;34mThe sparsity of the dataset[0m: 99.9988789794357%
[1;34mRemain Fields[0m: ['user_id', 'item_id', 'rating', 'timestamp', 'item_id_list', 'rating_list', 'timestamp_list', 'item_length']
(207648,)
=== Loading Amazon_Books ===
Creating 5-core filtered dataset...
/home/zzheng3/miniconda3/envs/rs/lib/python3.12/site-packages/recbole/data/dataset/dataset.py:648: FutureWarning: A value is trying to be set on a copy of a DataFrame or Series through chained assignment using an inplace method.
The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  feat[field].fillna(value=0, inplace=True)
/home/zzheng3/miniconda3/envs/rs/lib/python3.12/site-packages/recbole/data/dataset/dataset.py:650: FutureWarning: A value is trying to be set on a copy of a DataFrame or Series through chained assignment using an inplace method.
The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  feat[field].fillna(value=feat[field].mean(), inplace=True)

:bar_chart: 5-core Filtered Dataset Statistics:
[1;35mAmazon_Books[0m
[1;34mThe number of users[0m: 689537
[1;34mAverage actions of users[0m: 11.156473338592908
[1;34mThe number of items[0m: 449585
[1;34mAverage actions of items[0m: 17.11140175856539
[1;34mThe number of inters[0m: 7692790
[1;34mThe sparsity of the dataset[0m: 99.99751849880246%
[1;34mRemain Fields[0m: ['user_id', 'item_id', 'rating', 'timestamp', 'item_id_list', 'rating_list', 'timestamp_list', 'item_length']
[1;35mAmazon_Books[0m
[1;34mThe number of users[0m: 689537
[1;34mAverage actions of users[0m: 9.156473338592908
[1;34mThe number of items[0m: 449585
[1;34mAverage actions of items[0m: 14.076404805008337
[1;34mThe number of inters[0m: 6313718
[1;34mThe sparsity of the dataset[0m: 99.99796335285664%
[1;34mRemain Fields[0m: ['user_id', 'item_id', 'rating', 'timestamp', 'item_id_list', 'rating_list', 'timestamp_list', 'item_length']
(689536,)
[1;35mAmazon_Books[0m
[1;34mThe number of users[0m: 689537
[1;34mAverage actions of users[0m: 1.0
[1;34mThe number of items[0m: 449585
[1;34mAverage actions of items[0m: 2.8250062478746982
[1;34mThe number of inters[0m: 689536
[1;34mThe sparsity of the dataset[0m: 99.9997775729729%
[1;34mRemain Fields[0m: ['user_id', 'item_id', 'rating', 'timestamp', 'item_id_list', 'rating_list', 'timestamp_list', 'item_length']
(689536,)
[1;35mAmazon_Books[0m
[1;34mThe number of users[0m: 689537
[1;34mAverage actions of users[0m: 1.0~
[1;34mThe number of items[0m: 449585
[1;34mAverage actions of items[0m: 2.968235724586212
[1;34mThe number of inters[0m: 689536
[1;34mThe sparsity of the dataset[0m: 99.9997775729729%
[1;34mRemain Fields[0m: ['user_id', 'item_id', 'rating', 'timestamp', 'item_id_list', 'rating_list', 'timestamp_list', 'item_length']
(689536,)
=== Loading book-crossing ===
Creating 5-core filtered dataset...
/home/zzheng3/miniconda3/envs/rs/lib/python3.12/site-packages/recbole/data/dataset/dataset.py:648: FutureWarning: A value is trying to be set on a copy of a DataFrame or Series through chained assignment using an inplace method.
The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  feat[field].fillna(value=0, inplace=True)
/home/zzheng3/miniconda3/envs/rs/lib/python3.12/site-packages/recbole/data/dataset/dataset.py:650: FutureWarning: A value is trying to be set on a copy of a DataFrame or Series through chained assignment using an inplace method.
The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  feat[field].fillna(value=feat[field].mean(), inplace=True)
Error loading book-crossing: [timestamp] is not exist in interaction [The batch_size of interaction: 116923
    user_id, torch.Size([116923]), cpu, torch.int64
    item_id, torch.Size([116923]), cpu, torch.int64
    rating, torch.Size([116923]), cpu, torch.float32

].
=== Loading Amazon_All_Beauty ===
Creating 5-core filtered dataset...
/home/zzheng3/miniconda3/envs/rs/lib/python3.12/site-packages/recbole/data/dataset/dataset.py:648: FutureWarning: A value is trying to be set on a copy of a DataFrame or Series through chained assignment using an inplace method.
The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  feat[field].fillna(value=0, inplace=True)
/home/zzheng3/miniconda3/envs/rs/lib/python3.12/site-packages/recbole/data/dataset/dataset.py:650: FutureWarning: A value is trying to be set on a copy of a DataFrame or Series through chained assignment using an inplace method.
The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  feat[field].fillna(value=feat[field].mean(), inplace=True)

:bar_chart: 5-core Filtered Dataset Statistics:
[1;35mAmazon_All_Beauty[0m
[1;34mThe number of users[0m: 199
[1;34mAverage actions of users[0m: 8.535353535353535
[1;34mThe number of items[0m: 281
[1;34mAverage actions of items[0m: 6.101083032490974
[1;34mThe number of inters[0m: 1690
[1;34mThe sparsity of the dataset[0m: 96.97777141937445%
[1;34mRemain Fields[0m: ['user_id', 'item_id', 'rating', 'timestamp', 'item_id_list', 'rating_list', 'timestamp_list', 'item_length']
[1;35mAmazon_All_Beauty[0m
[1;34mThe number of users[0m: 199
[1;34mAverage actions of users[0m: 6.5353535353535355
[1;34mThe number of items[0m: 281
[1;34mAverage actions of items[0m: 4.864661654135339
[1;34mThe number of inters[0m: 1294
[1;34mThe sparsity of the dataset[0m: 97.68593858974587%
[1;34mRemain Fields[0m: ['user_id', 'item_id', 'rating', 'timestamp', 'item_id_list', 'rating_list', 'timestamp_list', 'item_length']
(198,)
[1;35mAmazon_All_Beauty[0m
[1;34mThe number of users[0m: 199
[1;34mAverage actions of users[0m: 1.0
[1;34mThe number of items[0m: 281
[1;34mAverage actions of items[0m: 1.5114503816793894
[1;34mThe number of inters[0m: 198
[1;34mThe sparsity of the dataset[0m: 99.64591641481428%
[1;34mRemain Fields[0m: ['user_id', 'item_id', 'rating', 'timestamp', 'item_id_list', 'rating_list', 'timestamp_list', 'item_length']
(198,)
[1;35mAmazon_All_Beauty[0m
[1;34mThe number of users[0m: 199
[1;34mAverage actions of users[0m: 1.0
[1;34mThe number of items[0m: 281
[1;34mAverage actions of items[0m: 1.81651376146789
[1;34mThe number of inters[0m: 198
[1;34mThe sparsity of the dataset[0m: 99.64591641481428%
[1;34mRemain Fields[0m: ['user_id', 'item_id', 'rating', 'timestamp', 'item_id_list', 'rating_list', 'timestamp_list', 'item_length']
(198,)
=== Loading Amazon_Musical_Instruments ===
Creating 5-core filtered dataset...
/home/zzheng3/miniconda3/envs/rs/lib/python3.12/site-packages/recbole/data/dataset/dataset.py:648: FutureWarning: A value is trying to be set on a copy of a DataFrame or Series through chained assignment using an inplace method.
The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  feat[field].fillna(value=0, inplace=True)
/home/zzheng3/miniconda3/envs/rs/lib/python3.12/site-packages/recbole/data/dataset/dataset.py:650: FutureWarning: A value is trying to be set on a copy of a DataFrame or Series through chained assignment using an inplace method.
The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  feat[field].fillna(value=feat[field].mean(), inplace=True)

:bar_chart: 5-core Filtered Dataset Statistics:
[1;35mAmazon_Musical_Instruments[0m
[1;34mThe number of users[0m: 48454
[1;34mAverage actions of users[0m: 7.8265742059315215
[1;34mThe number of items[0m: 21414
[1;34mAverage actions of items[0m: 17.713158017656127
[1;34mThe number of inters[0m: 379221
[1;34mThe sparsity of the dataset[0m: 99.96345188811027%
[1;34mRemain Fields[0m: ['user_id', 'item_id', 'rating', 'timestamp', 'item_id_list', 'rating_list', 'timestamp_list', 'item_length']
[1;35mAmazon_Musical_Instruments[0m
[1;34mThe number of users[0m: 48454
[1;34mAverage actions of users[0m: 5.8265742059315215
[1;34mThe number of items[0m: 21414
[1;34mAverage actions of items[0m: 13.218850962213795
[1;34mThe number of inters[0m: 282315
[1;34mThe sparsity of the dataset[0m: 99.97279137967531%
[1;34mRemain Fields[0m: ['user_id', 'item_id', 'rating', 'timestamp', 'item_id_list', 'rating_list', 'timestamp_list', 'item_length']
(48453,)
[1;35mAmazon_Musical_Instruments[0m
[1;34mThe number of users[0m: 48454
[1;34mAverage actions of users[0m: 1.0
[1;34mThe number of items[0m: 21414
[1;34mAverage actions of items[0m: 3.3051159618008183
[1;34mThe number of inters[0m: 48453
[1;34mThe sparsity of the dataset[0m: 99.99533025421748%
[1;34mRemain Fields[0m: ['user_id', 'item_id', 'rating', 'timestamp', 'item_id_list', 'rating_list', 'timestamp_list', 'item_length']
(48453,)
[1;35mAmazon_Musical_Instruments[0m
[1;34mThe number of users[0m: 48454
[1;34mAverage actions of users[0m: 1.0
[1;34mThe number of items[0m: 21414
[1;34mAverage actions of items[0m: 3.3866638708324595
[1;34mThe number of inters[0m: 48453
[1;34mThe sparsity of the dataset[0m: 99.99533025421748%
[1;34mRemain Fields[0m: ['user_id', 'item_id', 'rating', 'timestamp', 'item_id_list', 'rating_list', 'timestamp_list', 'item_length']
(48453,)
=== Loading anime ===
Creating 5-core filtered dataset...
/home/zzheng3/miniconda3/envs/rs/lib/python3.12/site-packages/recbole/data/dataset/dataset.py:648: FutureWarning: A value is trying to be set on a copy of a DataFrame or Series through chained assignment using an inplace method.
The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  feat[field].fillna(value=0, inplace=True)
/home/zzheng3/miniconda3/envs/rs/lib/python3.12/site-packages/recbole/data/dataset/dataset.py:650: FutureWarning: A value is trying to be set on a copy of a DataFrame or Series through chained assignment using an inplace method.
The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  feat[field].fillna(value=feat[field].mean(), inplace=True)
Error loading anime: [timestamp] is not exist in interaction [The batch_size of interaction: 7793926
    user_id, torch.Size([7793926]), cpu, torch.int64
    item_id, torch.Size([7793926]), cpu, torch.int64
    rating, torch.Size([7793926]), cpu, torch.float32

].
=== Loading Food ===
Creating 5-core filtered dataset...
/home/zzheng3/miniconda3/envs/rs/lib/python3.12/site-packages/recbole/data/dataset/dataset.py:648: FutureWarning: A value is trying to be set on a copy of a DataFrame or Series through chained assignment using an inplace method.
The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  feat[field].fillna(value=0, inplace=True)
/home/zzheng3/miniconda3/envs/rs/lib/python3.12/site-packages/recbole/data/dataset/dataset.py:650: FutureWarning: A value is trying to be set on a copy of a DataFrame or Series through chained assignment using an inplace method.
The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  feat[field].fillna(value=feat[field].mean(), inplace=True)

:bar_chart: 5-core Filtered Dataset Statistics:
[1;35mFood[0m
[1;34mThe number of users[0m: 16645
[1;34mAverage actions of users[0m: 30.48594087959625
[1;34mThe number of items[0m: 39448
[1;34mAverage actions of items[0m: 12.863031409232642
[1;34mThe number of inters[0m: 507408
[1;34mThe sparsity of the dataset[0m: 99.92272330829917%
[1;34mRemain Fields[0m: ['user_id', 'item_id', 'rating', 'timestamp', 'item_id_list', 'rating_list', 'timestamp_list', 'item_length']
[1;35mFood[0m
[1;34mThe number of users[0m: 16645
[1;34mAverage actions of users[0m: 28.48594087959625
[1;34mThe number of items[0m: 39448
[1;34mAverage actions of items[0m: 12.019469654717842
[1;34mThe number of inters[0m: 474120
[1;34mThe sparsity of the dataset[0m: 99.9277929692295%
[1;34mRemain Fields[0m: ['user_id', 'item_id', 'rating', 'timestamp', 'item_id_list', 'rating_list', 'timestamp_list', 'item_length']
(16644,)
[1;35mFood[0m
[1;34mThe number of users[0m: 16645
[1;34mAverage actions of users[0m: 1.0
[1;34mThe number of items[0m: 39448
[1;34mAverage actions of items[0m: 1.7695088241547947
[1;34mThe number of inters[0m: 16644
[1;34mThe sparsity of the dataset[0m: 99.99746516953483%
[1;34mRemain Fields[0m: ['user_id', 'item_id', 'rating', 'timestamp', 'item_id_list', 'rating_list', 'timestamp_list', 'item_length']
(16644,)
[1;35mFood[0m
[1;34mThe number of users[0m: 16645
[1;34mAverage actions of users[0m: 1.0
[1;34mThe number of items[0m: 39448
[1;34mAverage actions of items[0m: 1.801103776647549
[1;34mThe number of inters[0m: 16644
[1;34mThe sparsity of the dataset[0m: 99.99746516953483%
[1;34mRemain Fields[0m: ['user_id', 'item_id', 'rating', 'timestamp', 'item_id_list', 'rating_list', 'timestamp_list', 'item_length']
(16644,)

In [None]:
# train_grpo.py
from datasets import load_dataset
from trl import GRPOConfig, GRPOTrainer

dataset = load_dataset("trl-lib/ultrafeedback-prompt", split="train")

# Dummy reward function for demonstration purposes
def reward_num_unique_letters(completions, **kwargs):
    """Reward function that rewards completions with more unique letters."""
    completion_contents = [completion[0]["content"] for completion in completions]
    return [float(len(set(content))) for content in completion_contents]

In [None]:
training_args = GRPOConfig(output_dir="Qwen2-0.5B-GRPO")
trainer = GRPOTrainer(
    model="Qwen/Qwen2-0.5B-Instruct",
    reward_funcs=reward_num_unique_letters,
    args=training_args,
    train_dataset=dataset,
)
trainer.train()

In [None]:
#!/usr/bin/env python3
"""
Remove u"" wrappers from steam.item file
"""

import re

def remove_u_quotes(input_file, output_file):
    with open(input_file, 'r', encoding='utf-8') as f:
        content = f.read()
    
    # Remove u" at the beginning and " at the end
    # Pattern explanation:
    # \bu" - matches u" at word boundary (not part of another word)
    # ([^"]*) - captures everything inside the quotes
    # " - matches the closing quote
    fixed_content = re.sub(r'\bu"([^"]*)"', r'\1', content)
    
    with open(output_file, 'w', encoding='utf-8') as f:
        f.write(fixed_content)
    
    print(f"Fixed file written to: {output_file}")
    
    # Count fixes
    original_count = len(re.findall(r'\bu"[^"]*"', content))
    print(f"Removed {original_count} instances of u\"...\" wrappers")

if __name__ == "__main__":
    import shutil
    
    input_file = "/home/zzheng3/AmazonReviews2023/dataset/steam/steam.item"
    output_file = "/home/zzheng3/AmazonReviews2023/dataset/steam/steam.item.fixed"
    
    # Process the file
    remove_u_quotes(input_file, output_file)
    
    # Backup and replace
    backup_file = input_file + ".backup2"
    shutil.copy(input_file, backup_file)
    print(f"Created backup at: {backup_file}")
    
    shutil.move(output_file, input_file)
    print("Replaced original file")

In [None]:
import pandas as pd

tmp = pd.read_csv("/home/zzheng3/AmazonReviews2023/dataset/steam/steam.item", sep="\t")

In [None]:
tmp.columns = tmp.columns.str.split(':').str[0]

In [None]:
# check nan in tmp['product_id']
tmp[tmp['product_id'].isna()]


In [None]:
import pickle as pkl
import json

with open("completions_ml-1m.pkl", "rb") as f:
    completions = pkl.load(f)


In [None]:
completions

In [None]:
from trl import SFTTrainer
from datasets import load_dataset

train_dataset=load_dataset("trl-lib/Capybara", split="train"),

In [None]:
train_dataset

In [None]:
from datasets import Dataset
from transformers import AutoTokenizer, AutoModelForCausalLM
from trl import SFTTrainer, SFTConfig

# Ëá™Âª∫Êï∞ÊçÆ
samples = [
    {"text": "User: ËØ∑Ëß£Èáä‰∏Ä‰∏ãÈáèÂ≠êËÆ°ÁÆó„ÄÇ\nAssistant: ÈáèÂ≠êËÆ°ÁÆóÊòØÂà©Áî®ÈáèÂ≠êÂè†Âä†ÂíåÁ∫†Áº†Á≠âÂéüÁêÜËøõË°å‰ø°ÊÅØÂ§ÑÁêÜÁöÑËÆ°ÁÆóÊñπÂºè„ÄÇ"},
    {"text": "User: ÁªôÊàë‰∏â‰∏™ Python ÊèêÈ´òÊïàÁéáÁöÑÂ∞èÊäÄÂ∑ß„ÄÇ\nAssistant: 1) ‰ΩøÁî®ÂàóË°®Êé®ÂØºÂºèÔºõ2) ‰ΩøÁî®ÁîüÊàêÂô®Ôºõ3) ÂñÑÁî®Ê†áÂáÜÂ∫ì„ÄÇ"},
]
dataset = Dataset.from_list(samples)

# Ê®°Âûã
model_name = "Qwen/Qwen2.5-0.5B-Instruct"
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForCausalLM.from_pretrained(model_name)

if tokenizer.pad_token is None:
    tokenizer.pad_token = tokenizer.eos_token
    model.config.pad_token_id = tokenizer.eos_token_id

# ‚úÖ ÊóßÁâàÊú¨ÂÖºÂÆπÂÜôÊ≥ïÔºöÂè™ÊîæÊ†áÂáÜ TrainingArguments
config = SFTConfig(
    output_dir="./sft_text_output",
    per_device_train_batch_size=2,
    num_train_epochs=1,
    learning_rate=2e-5,
    logging_steps=10,
)

# ‚úÖ Ëøô‰∫õÂèÇÊï∞ÊîπÂú® SFTTrainer Èáå‰º†
trainer = SFTTrainer(
    model=model,
    tokenizer=tokenizer,
    train_dataset=dataset,
    dataset_text_field="text",
    max_seq_length=1024,  # Âú®ËøôÈáå‰º†
    packing=False,        # Âú®ËøôÈáå‰º†
    args=config,
)

trainer.train()
trainer.model.save_pretrained("./sft_text_output/final")
tokenizer.save_pretrained("./sft_text_output/final")

In [18]:
import pickle as pkl

rl_completions = pkl.load(open("completions/completions_ml-1m_use_hf_local_do_test_do_test_rl_use_vllm.pkl", "rb"))
sft_completions = pkl.load(open("completions/completions_ml-1m_use_hf_local_do_test_do_test_sft_use_vllm.pkl", "rb"))
ori_sft_completions = pkl.load(open("completions/completions_ml-1m_use_hf_local_do_test_use_vllm_do_sft.pkl", "rb"))
raw_completions = pkl.load(open("completions/completions_ml-1m_use_hf_local_do_test_use_vllm.pkl", "rb"))

In [31]:
user = 999
import json

print('zero-shot:', json.dumps(json.loads(raw_completions[user]), indent=4))
# print('sft (discrete)')
print('sft (discrete):', json.dumps(json.loads(ori_sft_completions[user]), indent=4))
print('sft (soft):', json.dumps(json.loads(sft_completions[user]), indent=4))

zero-shot: {
    "bpr": {
        "top-k": 50,
        "score-weight": 0.5
    },
    "itemknn": {
        "top-k": 50,
        "score-weight": 0.5
    },
    "fpmc": {
        "top-k": 50,
        "score-weight": 0.5
    },
    "pop": {
        "top-k": 50,
        "score-weight": 0.5
    },
    "sasrec": {
        "top-k": 50,
        "score-weight": 0.5
    }
}
sft (discrete): {
    "bpr": {
        "top-k": 0,
        "score-weight": 0.0
    },
    "itemknn": {
        "top-k": 0,
        "score-weight": 0.0
    },
    "fpmc": {
        "top-k": 0,
        "score-weight": 0.0
    },
    "pop": {
        "top-k": 0,
        "score-weight": 0.0
    },
    "sasrec": {
        "top-k": 0,
        "score-weight": 0.0
    }
}
sft (soft): {
    "bpr": {
        "top-k": 138,
        "score-weight": 0.18437745215208146
    },
    "itemknn": {
        "top-k": 114,
        "score-weight": 0.15215849375865598
    },
    "fpmc": {
        "top-k": 67,
        "score-weight": 0.089540629848484

In [7]:
sth_completions = pkl.load(open("completions/completions_ml-1m.pkl", "rb"))

In [8]:
sth_completions[user]

'{"bpr": {"top-k": 653, "score-weight": 0.0}, "itemknn": {"top-k": 982, "score-weight": 0.0}, "fpmc": {"top-k": 982, "score-weight": 0.982}, "pop": {"top-k": 982, "score-weight": -1.0}, "sasrec": {"top-k": 653, "score-weight": 0.9821} }'

In [12]:
from datasets import Dataset
data = Dataset.load_from_disk("GRPO/grpo_models/ml-1m/Llama-3.2-1B-Instruct_sft_data")

  from .autonotebook import tqdm as notebook_tqdm


In [32]:
print('ground truth:', json.dumps(json.loads(data[999]['completion']), indent=4))

ground truth: {
    "bpr": {
        "top-k": 150,
        "score-weight": 0.20128789985964096
    },
    "sasrec": {
        "top-k": 145,
        "score-weight": 0.19354841692367755
    },
    "fpmc": {
        "top-k": 173,
        "score-weight": 0.23116514639709734
    },
    "pop": {
        "top-k": 141,
        "score-weight": 0.18910396995780204
    },
    "itemknn": {
        "top-k": 138,
        "score-weight": 0.18489456686178224
    }
}


In [1]:
from transformers import AutoTokenizer, AutoModelForCausalLM

model_id = "meta-llama/Llama-3.1-8B-Instruct"   # ÊàñÁî®‰Ω†Êú¨Âú∞/ÁßÅÊúâÊùÉÈáç
tok = AutoTokenizer.from_pretrained(model_id, use_fast=True)

# 1) Ê≥®ÂÜå‰∏§‰∏™Ëá™ÂÆö‰πâÁâπÊÆä token
new_tokens = {"additional_special_tokens": ["[num]", "[soft]"]}
num_added = tok.add_special_tokens(new_tokens)
print("Êñ∞Â¢ûtoken‰∏™Êï∞:", num_added)
print("ÁâπÊÆätokenË°®:", tok.special_tokens_map)

# 2) ËΩΩÂÖ•Ê®°ÂûãÂπ∂ÂêåÊ≠•ËØçË°®Â§ßÂ∞èÔºàÂÖ≥ÈîÆÊ≠•È™§Ôºâ
model = AutoModelForCausalLM.from_pretrained(model_id, torch_dtype="auto", device_map="auto")
if num_added > 0:
    model.resize_token_embeddings(len(tok))

# 3) È™åËØÅÔºöËøô‰∫õËØçÊòØÂê¶Ë¢´ÂΩì‰Ωú‰∏Ä‰∏™Êï¥‰ΩìÂàáÂàÜ
sents = [
    "‰ª∑Ê†ºÊòØ[num]Ôºå‰ΩÜË¥®Âú∞Âæà[soft]„ÄÇ",
    "ËØ∑Êää[num]ÊõøÊç¢ÊàêÂÆûÈôÖÊï∞Â≠óÔºåÊää[soft]ÊõøÊç¢ÊàêÊüîËΩØÂ∫¶„ÄÇ",
]
for s in sents:
    print("\nÂè•Â≠ê:", s)
    print("tokenize ->", tok.tokenize(s))
    enc = tok(s, return_tensors="pt", add_special_tokens=False)
    print("ids ->", enc["input_ids"].tolist()[0])
    # ‰πüÂèØ‰ª•ÂèçËß£ÁúãÂõûÊîæ
    print("decode ->", tok.decode(enc["input_ids"][0], skip_special_tokens=False))

  from .autonotebook import tqdm as notebook_tqdm


Êñ∞Â¢ûtoken‰∏™Êï∞: 2
ÁâπÊÆätokenË°®: {'bos_token': '<|begin_of_text|>', 'eos_token': '<|eot_id|>', 'additional_special_tokens': ['[num]', '[soft]']}


`torch_dtype` is deprecated! Use `dtype` instead!
Loading checkpoint shards: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 4/4 [00:05<00:00,  1.30s/it]
The new embeddings will be initialized from a multivariate normal distribution that has old embeddings' mean and covariance. As described in this article: https://nlp.stanford.edu/~johnhew/vocab-expansion.html. To disable this, use `mean_resizing=False`
The new lm_head weights will be initialized from a multivariate normal distribution that has old embeddings' mean and covariance. As described in this article: https://nlp.stanford.edu/~johnhew/vocab-expansion.html. To disable this, use `mean_resizing=False`



Âè•Â≠ê: ‰ª∑Ê†ºÊòØ[num]Ôºå‰ΩÜË¥®Âú∞Âæà[soft]„ÄÇ
tokenize -> ['√§¬ª¬∑√¶≈Ç¬º', '√¶ƒ∫¬Ø', '[num]', '√Ø¬ºƒÆ√§¬Ωƒ®', '√®¬¥¬®', '√•ƒæ¬∞', '√•¬æƒ™', '[soft]', '√£ƒ¢ƒ§']
ids -> [98580, 21043, 128256, 102378, 103706, 30590, 101600, 128257, 1811]
decode -> ‰ª∑Ê†ºÊòØ[num]Ôºå‰ΩÜË¥®Âú∞Âæà[soft]„ÄÇ

Âè•Â≠ê: ËØ∑Êää[num]ÊõøÊç¢ÊàêÂÆûÈôÖÊï∞Â≠óÔºåÊää[soft]ÊõøÊç¢ÊàêÊüîËΩØÂ∫¶„ÄÇ
tokenize -> ['√®¬Ø¬∑', '√¶ƒ¨ƒ¨', '[num]', '√¶ƒΩ¬ø', '√¶ƒØ¬¢', '√¶ƒ™ƒ≤', '√•¬Æ≈Ä√©ƒªƒß', '√¶ƒ∑¬∞√•≈Éƒπ', '√Ø¬ºƒÆ√¶ƒ¨ƒ¨', '[soft]', '√¶ƒΩ¬ø', '√¶ƒØ¬¢', '√¶ƒ™ƒ≤', '√¶≈Åƒ∂', '√®¬Ω¬Ø', '√•¬∫¬¶', '√£ƒ¢ƒ§']
ids -> [15225, 102178, 128256, 109913, 72234, 13153, 115827, 83687, 117424, 128257, 109913, 72234, 13153, 115289, 65372, 27479, 1811]
decode -> ËØ∑Êää[num]ÊõøÊç¢ÊàêÂÆûÈôÖÊï∞Â≠óÔºåÊää[soft]ÊõøÊç¢ÊàêÊüîËΩØÂ∫¶„ÄÇ


In [None]:
text = '''
Available models: 
['{\n  "description": "Bayesian Personalized Ranking, a classic pairwise ranking method based on matrix factorization. It focuses on modeling user preference orderings.",\n  "when_to_use": "Use when the task involves general recommendation based on long-term user preferences, without considering sequence order. Suitable for implicit feedback like clicks or likes.",\n  "input": "A user-item interaction matrix or embeddings representing user and item factors.",\n  "output": "Top-K candidate items ranked by the user\'s overall preference."\n}', '{\n  "description": "A Transformer-based sequential recommendation model (Self-Attentive Sequential Recommendation). It captures the order and short-term interest patterns from the user\'s recent interactions.",\n  "when_to_use": "Use when the task requires modeling the sequence of recent user interactions or short-term preferences. For example, predicting the next item a user might click or watch.",\n  "input": "A chronologically ordered sequence of user-item interactions.",\n  "output": "Top-K candidate items predicted as the next likely interactions."\n}', '{\n  "description": "Factorizing Personalized Markov Chains, a hybrid model combining matrix factorization (long-term user preferences) with first-order Markov chains (short-term sequential patterns). It predicts the next item by considering both user embedding and the transition from the last interacted item.",\n  "when_to_use": "Use when the recommendation task involves next-item prediction or session-based recommendation, where both long-term preferences and recent sequential behavior matter.",\n  "input": "User embedding (long-term preference) and the last interacted item (short-term context).",\n  "output": "Top-K candidate items predicted as the user\'s next likely interaction."\n}', '{\n  "description": "A simple non-personalized baseline that recommends items purely based on their overall popularity (e.g., number of interactions).",\n  "when_to_use": "Use as a baseline for comparison or in cold-start situations where user-specific data is not available.",\n  "input": "Global item interaction counts or frequencies.",\n  "output": "Top-K items ranked by overall popularity."\n}', '{\n  "description": "An item-based collaborative filtering model that recommends items similar to those a user has already interacted with, using item-to-item similarity (e.g., cosine similarity, Jaccard).",\n  "when_to_use": "Use when item similarity can effectively capture user preference patterns. Works well in scenarios like e-commerce or content platforms where co-purchase or co-view signals are strong.",\n  "input": "Item-item similarity matrix built from historical user-item interactions.",\n  "output": "Top-K items most similar to the user\\u2019s past interacted items."\n}']
User Profile:
{
    "purchased item numbers": 36,
    "purchase history": [
        {
            "categories": "All Beauty",
            "average_rating": 4.3,
            "rating_number": 535,
            "price": 7.49,
            "rating": 3.0,
            "timestamp": 1562688988349.0
        },
        {
            "categories": "All Beauty",
            "average_rating": 3.8,
            "rating_number": 38,
            "price": 9.99,
            "rating": 5.0,
            "timestamp": 1570227921988.0
        },
        {
            "categories": "All Beauty",
            "average_rating": 4.2,
            "rating_number": 63,
            "price": NaN,
            "rating": 4.0,
            "timestamp": 1580159586262.0
        },
        {
            "categories": "All Beauty",
            "average_rating": 4.1,
            "rating_number": 81,
            "price": NaN,
            "rating": 4.0,
            "timestamp": 1580344556353.0
        },
        {
            "categories": "All Beauty",
            "average_rating": 4.1,
            "rating_number": 213,
            "price": 15.97,
            "rating": 4.0,
            "timestamp": 1580931456931.0
        }
    ]
}
Expected output format example:
{
  "bpr": {
    "top-k": "integer between 1 and 500",
    "score-weight": "float between 0 and 1"
  },
  "sasrec": {
    "top-k": "integer between 1 and 500",
    "score-weight": "float between 0 and 1"
  },
  "fpmc": {
    "top-k": "integer between 1 and 500",
    "score-weight": "float between 0 and 1"
  },
  "pop": {
    "top-k": "integer between 1 and 500",
    "score-weight": "float between 0 and 1"
  },
  "itemknn": {
    "top-k": "integer between 1 and 500",
    "score-weight": "float between 0 and 1"
  }
}

Please output the JSON file containing the usage of ALL availablemodels.Your JSON response:
'''
tokens = tok(text, padding=False, return_tensors=None)
actual_length = len(tokens["input_ids"])
print(f"TokenÊï∞Èáè: {actual_length}")

TokenÊï∞Èáè: 5


In [13]:
from datetime import datetime
datetime.fromtimestamp(1545397523659.0 / 1000).strftime('%Y-%m-%d %H:%M:%S')

'2018-12-21 13:05:23'