In [1]:
%pip  install accelerate peft bitsandbytes transformers trl pynvml

Looking in indexes: http://mirrors.aliyun.com/pypi/simple
[0mNote: you may need to restart the kernel to use updated packages.


In [2]:
import subprocess
import os

# 访问huggingfase 
result = subprocess.run('bash -c "source /etc/network_turbo && env | grep proxy"', shell=True, capture_output=True, text=True)
output = result.stdout
for line in output.splitlines():
    if '=' in line:
        var, value = line.split('=', 1)
        os.environ[var] = value

In [3]:
import os
from datasets import load_dataset
from transformers import (
    AutoModelForCausalLM,
    AutoTokenizer,
    BitsAndBytesConfig,
    TrainingArguments,
    pipeline,
    logging,
)
from peft import LoraConfig
from trl import SFTTrainer
from tqdm import tqdm
import torch
import time
import pandas as pd
import numpy as np

  import pynvml  # type: ignore[import]


In [4]:
!nvidia-smi

Thu Feb 26 23:01:55 2026       
+-----------------------------------------------------------------------------------------+
| NVIDIA-SMI 550.78                 Driver Version: 550.78         CUDA Version: 12.4     |
|-----------------------------------------+------------------------+----------------------+
| GPU  Name                 Persistence-M | Bus-Id          Disp.A | Volatile Uncorr. ECC |
| Fan  Temp   Perf          Pwr:Usage/Cap |           Memory-Usage | GPU-Util  Compute M. |
|                                         |                        |               MIG M. |
|   0  NVIDIA GeForce RTX 4090 D      On  |   00000000:A8:00.0 Off |                  Off |
| 30%   28C    P8             13W /  425W |       4MiB /  24564MiB |      0%      Default |
|                                         |                        |                  N/A |
+-----------------------------------------+------------------------+----------------------+
                                                

In [5]:
# 数据
# https://huggingface.co/datasets/neil-code/dialogsum-test

from datasets import load_dataset
dataset = load_dataset("./data/neil-code_dialogsum-test")

Generating train split: 0 examples [00:00, ? examples/s]

Generating validation split: 0 examples [00:00, ? examples/s]

Generating test split: 0 examples [00:00, ? examples/s]

In [6]:
dataset

DatasetDict({
    train: Dataset({
        features: ['id', 'dialogue', 'summary', 'topic'],
        num_rows: 1999
    })
    validation: Dataset({
        features: ['id', 'dialogue', 'summary', 'topic'],
        num_rows: 499
    })
    test: Dataset({
        features: ['id', 'dialogue', 'summary', 'topic'],
        num_rows: 499
    })
})

In [7]:
dataset['train'][0]

{'id': 'train_0',
 'dialogue': "#Person1#: Hi, Mr. Smith. I'm Doctor Hawkins. Why are you here today?\n#Person2#: I found it would be a good idea to get a check-up.\n#Person1#: Yes, well, you haven't had one for 5 years. You should have one every year.\n#Person2#: I know. I figure as long as there is nothing wrong, why go see the doctor?\n#Person1#: Well, the best way to avoid serious illnesses is to find out about them early. So try to come at least once a year for your own good.\n#Person2#: Ok.\n#Person1#: Let me see here. Your eyes and ears look fine. Take a deep breath, please. Do you smoke, Mr. Smith?\n#Person2#: Yes.\n#Person1#: Smoking is the leading cause of lung cancer and heart disease, you know. You really should quit.\n#Person2#: I've tried hundreds of times, but I just can't seem to kick the habit.\n#Person1#: Well, we have classes and some medications that might help. I'll give you more information before you leave.\n#Person2#: Ok, thanks doctor.",
 'summary': "Mr. Smith'

dialogue 和 summary.。⼀个作为输⼊，另外⼀个作为输出。 但我们要
finetune 的是 LLM，格式是要满⾜ instruction data 的格式。 如果是传统 NLP 任务，我们实际上可以
把<dialogue, summary>直接作为输⼊和输出送到模型⾥来处

可以设计prompt

Give the conversation, extract the main points and summarize the conversions
{dialogue}
{summary}理。

In [8]:
def create_prompt_formats(sample):
    """
    Format various fields of the sample ('instruction','output')
    :param sample: input data
    """
    INTRO_BLURB = "Instruct: Below is an instruction that describes a task. Write a response that appropriately completes the request."
    INSTRUCTION_KEY = "Input: Please Summarize the below conversation."
    RESPONSE_KEY = "Output:"
    
    blurb = f"\n{INTRO_BLURB}"
    instruction = f"{INSTRUCTION_KEY}"
    input_context = f"{sample['dialogue']}" if sample["dialogue"] else None
    response = f"{RESPONSE_KEY}\n{sample['summary']}"
    
    parts = [part for part in [blurb, instruction, input_context, response] if part]
    formatted_prompt = "\n\n".join(parts)
    sample["text"] = formatted_prompt
    return sample

In [9]:
print(create_prompt_formats(dataset['train'][0])['text'])


Instruct: Below is an instruction that describes a task. Write a response that appropriately completes the request.

Input: Please Summarize the below conversation.

#Person1#: Hi, Mr. Smith. I'm Doctor Hawkins. Why are you here today?
#Person2#: I found it would be a good idea to get a check-up.
#Person1#: Yes, well, you haven't had one for 5 years. You should have one every year.
#Person2#: I know. I figure as long as there is nothing wrong, why go see the doctor?
#Person1#: Well, the best way to avoid serious illnesses is to find out about them early. So try to come at least once a year for your own good.
#Person2#: Ok.
#Person1#: Let me see here. Your eyes and ears look fine. Take a deep breath, please. Do you smoke, Mr. Smith?
#Person2#: Yes.
#Person1#: Smoking is the leading cause of lung cancer and heart disease, you know. You really should quit.
#Person2#: I've tried hundreds of times, but I just can't seem to kick the habit.
#Person1#: Well, we have classes and some medicatio

In [10]:
# 配置模型

compute_dtype = getattr(torch, "float16")

quant_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_compute_dtype=compute_dtype,
    bnb_4bit_use_double_quant=True,
)

1. compute_dtype = getattr(torch, "float16") :
这⾏代码使⽤ getattr 函数从PyTorch库中获取 float16 数据类型。 float16 是16位浮点
数，它具有较⼩的数值范围和精度，但⽐32位浮点数（ float32 ）占⽤更少的内存和计算资
源。在量化过程中，使⽤ float16 可以减少模型的内存占⽤和加速计算。
2. quant_config = BitsAndBytesConfig(...) :
BitsAndBytesConfig 是 BitsAndBytes 库中⽤于配置量化参数的类。
3. load_in_4bit=True :
这个参数指定模型在加载时是否使⽤4位量化。设置为 True 意味着模型在加载会进⾏4位量
化处理。
4. bnb_4bit_quant_type="nf4" :
这个参数定义了4位量化的类型。 "nf4" 代表"NormalFloat 4"量，这⼀种⾮功能性 的量
化⽅法，通常⽤于测试和实验，因为它不会改变模型的权重值。
5. bnb_4bit_compute_dtype=compute_dtype :
这个参数设置了在4位量化计算时使⽤的数值类型。这⾥使⽤了之前定义的 ompute_dtype
，即 float16 。这意味着在进⾏4位量化计算时，会使⽤16位浮点数来处理
数值。
6. bnb_4bit_use_double_quant=True :
这个参数指定是否使⽤双重量化。双重量化是⼀种技术可以在不损失太多精度的情况下进⼀
步减少模型⼤⼩。双重量化是为了进⼀步量化 quantization consant，也是 Qlora ⾥⾯提出
的⼀种⽅式。

In [11]:
# 模型加载

model_path = "/root/autodl-tmp/LLM-Research/Meta-Llama-3-8B-Instruct/"

original_model = AutoModelForCausalLM.from_pretrained(
    model_path,
    torch_dtype=compute_dtype,
    device_map={"": 0},
    quantization_config=quant_config
)

`torch_dtype` is deprecated! Use `dtype` instead!


Loading weights:   0%|          | 0/291 [00:00<?, ?it/s]

model_path:这⾥我们指定了提前下载好的模型的本地路径，当然如果⽹络通畅的话，也可以将
model_path替换为huggingface上的模型名字，这样可以⾃动下载到本地。
device_map：⽤于定义模型参数应该放置在哪个计算卡上。字典的键是模型的参数名前缀，值是
设备的编号。在这个例⼦中，键是⼀个空字符串""，它通常代表所有的模型参数，⽽值 0 通常代表
第⼀个可⽤的 GPU。这意味着所有模型参数都应该放在编号为 0 的 GPU 上。如果有多个 GPU，
可以通过这个映射来指定不同的参数应该放在哪个 GPU 上，以实现模型的并⾏计算。
quantization_config=quant_config: 使⽤ 4 位量化配置，意味着模型在加载时使⽤这量化设置，
以优化内存使⽤和可能的计算效率。

In [12]:
!nvidia-smi

Thu Feb 26 23:12:40 2026       
+-----------------------------------------------------------------------------------------+
| NVIDIA-SMI 550.78                 Driver Version: 550.78         CUDA Version: 12.4     |
|-----------------------------------------+------------------------+----------------------+
| GPU  Name                 Persistence-M | Bus-Id          Disp.A | Volatile Uncorr. ECC |
| Fan  Temp   Perf          Pwr:Usage/Cap |           Memory-Usage | GPU-Util  Compute M. |
|                                         |                        |               MIG M. |
|   0  NVIDIA GeForce RTX 4090 D      On  |   00000000:A8:00.0 Off |                  Off |
| 30%   29C    P8             13W /  425W |    6525MiB /  24564MiB |      0%      Default |
|                                         |                        |                  N/A |
+-----------------------------------------+------------------------+----------------------+
                                                

In [13]:
# 加载 tokenizer

tokenizer = AutoTokenizer.from_pretrained(model_path, use_fast=False, trust_remote_code=True, padding_side=
"left",add_eos_token=True,add_bos_token=True)
tokenizer.pad_token_id = tokenizer.eos_token_id

padding_side=left 这个参数指定了在序列⻓度不⾜时，在序列的哪⼀侧添加填充
（padding）。在这⾥设置为"left"意味着填充将被添加到序列的左侧。⽐如我们在进⾏ mini-batch
训练的时候，为了达到最好的训练效率，会把 mini-batch ⾥⾯的 input⻓度弄成⼀样的。 ⼀的
操作是在 mini-batch 中，假如有 10 个不同的 input，⽽且每个 input ⻓度不⼀样，这时候以选
择最⻓的作为标准，对于剩下的 input，⻓度不⾜的部分⽤ padding token 来填充，可以在序列的
右边添加，也可以在左边添加。
add_eos_token=True: 这个参数指示分词器在序列的末尾添加⼀个结束符（EOS， nd Of
Sentence）。add_bos_token=True: 这个参数指示分词器在序列的开头添加⼀个开始符（BOS，
Beginning Of Sentence）。 ⽐如⼀句话“I love this cat”, 添加完之后变成 <bos>I love this
cat<eos> 。
use_fast=False: 这个参数指定是否使⽤分词器的快速版本。设置为 False 意味着将使⽤分词器的标
准版本，这通常是基于 Python 的。快速分词器通常是基于 Rust 编写的提供更好的性能和额外
的功能，但在某些情况下可能不⽀持某些特殊的⾃定义⾏为。
tokenizer.pad_token = tokenizer.eos_token: 这⾏代码将分词器的填充令牌（pad_token）设置为
与结束符令牌（eos_token）相同。这是必要的。

In [14]:
# 模型测试
eval_tokenizer = AutoTokenizer.from_pretrained(model_path, add_bos_token=True, trust_remote_code=True, use_fast=False)
eval_tokenizer.pad_token_id = eval_tokenizer.eos_token_id
def gen(model,p, maxlen=100, sample=True):
    toks = eval_tokenizer(p, return_tensors="pt")
    res = model.generate(**toks.to("cuda"), max_new_tokens=maxlen, do_sample=sample,num_return_sequences=1,temperature=0.1,num_beams=1,top_p=0.95,).to('cpu')
    return eval_tokenizer.batch_decode(res,skip_special_tokens=True)

toks = eval_tokenizer(p, return_tensors="pt")：

p: prompt,  return_tensors=”pt”指的是分词器返回的编码

res = model.generate(**toks.to("cuda"), max_new_tokens=maxlen, do_sample=sample,num_return_sequences=1,temperature=0.1,num_beams=1,top_p=0.95,).to('cpu')

- **toks.to("cuda")：将张量移至GPU进行加速运算
- max_new_tokens=maxlen：生成最大token数
- do_sample=sample：是否在生成时采样，如果为 True，则在⽣成时使⽤概率分布，这使得⽣成的
⽂本更多样化
- num_return_sequences=1：生成序列数量
- temperature=0.1: 控制⽣成过程中随机性的温度参数，较低的值（如 0.1）会导致更确定性的（也
就是说，更少随机性的）输出
- num_beams=1: 使⽤的 beam search 的 beam 数量，设置为 1 意味着不使⽤beam search。
- top_p=0.95: 使⽤ nucleus sampling 时保留的累积概率分布的部分。⽐如按照单词分布的⼤⼩排序
之后：单词 1: 概率 1， 单词 2:概率 2， 单词 3:概率 3 … 如果概率 1+概率 2+…概率 20，好
>=0.95, 那我们就从前 20 个单词中进⾏采样。。。


In [15]:
%%time
from transformers import set_seed
seed = 42
set_seed(seed)
index = 10

prompt = dataset['test'][index]['dialogue']
summary = dataset['test'][index]['summary']
formatted_prompt = f"Instruct: Summarize the following conversation.\nInput:{prompt}\nOutput:\n"
res = gen(original_model,formatted_prompt,200,)
#print(res[0])
output = res[0].split('Output:\n')[1]
dash_line = '-'.join('' for x in range(100))
print(dash_line)
print(f'INPUT PROMPT:\n{formatted_prompt}')
print(dash_line)
print(f'BASELINE HUMAN SUMMARY:\n{summary}\n')
print(dash_line)
print(f'MODEL GENERATION - ZERO SHOT:\n{output}')

Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


---------------------------------------------------------------------------------------------------
INPUT PROMPT:
Instruct: Summarize the following conversation.
Input:#Person1#: Happy Birthday, this is for you, Brian.
#Person2#: I'm so happy you remember, please come in and enjoy the party. Everyone's here, I'm sure you have a good time.
#Person1#: Brian, may I have a pleasure to have a dance with you?
#Person2#: Ok.
#Person1#: This is really wonderful party.
#Person2#: Yes, you are always popular with everyone. and you look very pretty today.
#Person1#: Thanks, that's very kind of you to say. I hope my necklace goes with my dress, and they both make me look good I feel.
#Person2#: You look great, you are absolutely glowing.
#Person1#: Thanks, this is a fine party. We should have a drink together to celebrate your birthday
Output:

---------------------------------------------------------------------------------------------------
BASELINE HUMAN SUMMARY:
#Person1# attends Brian's birth

In [16]:
# 数据预处理

def get_max_length(model):
    conf = model.config
    max_length = None
    for length_setting in ["n_positions", "max_position_embeddings", "seq_length"]:
        max_length = getattr(model.config, length_setting, None)
        if max_length:
            print(f"Found max lenth: {max_length}")
            break
        if not max_length:
            max_length = 1024
        print(f"Using default max length: {max_length}")
    return max_length

def preprocess_batch(batch, tokenizer, max_length):
    """
    Tokenizing a batch
    """
    return tokenizer( 
        batch["text"],
        max_length=max_length,
        truncation=True,
    )

In [17]:
from functools import partial
def preprocess_dataset(tokenizer: AutoTokenizer, max_length: int,seed, dataset): 
    # Add prompt to each sample
    print("Preprocessing dataset...")
    dataset = dataset.map(create_prompt_formats)
    _preprocessing_function = partial(preprocess_batch, max_length=max_length, tokenizer=tokenizer)
    dataset = dataset.map(
        _preprocessing_function,
        batched=True,
        remove_columns=['id', 'topic', 'dialogue', 'summary'],
    )
    # 过滤样本
    dataset = dataset.filter(lambda sample: len(sample["input_ids"]) < max_length)
    # Shuffle 数据
    dataset = dataset.shuffle(seed=seed)
    print("Preprocessing dataset done.")
    return dataset

In [18]:
max_length = get_max_length(original_model)
print(max_length)
train_dataset = preprocess_dataset(tokenizer, max_length,seed, dataset['train'])
eval_dataset = preprocess_dataset(tokenizer, max_length,seed, dataset['validation'])

Using default max length: 1024
Found max lenth: 8192
8192
Preprocessing dataset...


Map:   0%|          | 0/1999 [00:00<?, ? examples/s]

Map:   0%|          | 0/1999 [00:00<?, ? examples/s]

Filter:   0%|          | 0/1999 [00:00<?, ? examples/s]

Preprocessing dataset done.
Preprocessing dataset...


Map:   0%|          | 0/499 [00:00<?, ? examples/s]

Map:   0%|          | 0/499 [00:00<?, ? examples/s]

Filter:   0%|          | 0/499 [00:00<?, ? examples/s]

Preprocessing dataset done.


In [19]:
train_dataset

Dataset({
    features: ['text', 'input_ids', 'attention_mask'],
    num_rows: 1999
})

In [20]:
# 模型实例化
from peft import LoraConfig, get_peft_model, prepare_model_for_kbit_training
config = LoraConfig(
    r=64, #Rank
    lora_alpha=16,
    target_modules=["q_proj", "k_proj", "v_proj", "o_proj"],
    bias="none",
    lora_dropout=0.01, # Conventional
    task_type="CAUSAL_LM",
)
# 1 - Enabling gradient checkpointing to reduce memory usage during fine-tuning
original_model.gradient_checkpointing_enable()
# 2 - Using the prepare_model_for_kbit_training method from PEFT
original_model = prepare_model_for_kbit_training(original_model)
peft_model = get_peft_model(original_model, config)

In [25]:
output_dir = './peft-dialogue-summary-training/final-checkpoint'
import transformers
peft_training_args = TrainingArguments(
    output_dir = output_dir,
    warmup_steps=1,
    per_device_train_batch_size=1,
    gradient_accumulation_steps=1,
    max_steps=2000,
    learning_rate=2e-4,
    optim="paged_adamw_8bit",
    logging_steps=100,
    logging_dir="./logs",
    save_strategy="steps",
    save_steps=100,
    eval_strategy="steps",
    eval_steps=100,
    do_eval=True,
    gradient_checkpointing=True,
    report_to="none"
)
peft_model.config.use_cache = False
peft_trainer = transformers.Trainer(
    model=peft_model,
    train_dataset=train_dataset,
    eval_dataset=eval_dataset,
    args=peft_training_args,
    data_collator=transformers.DataCollatorForLanguageModeling(tokenizer, mlm=False),
)

`logging_dir` is deprecated and will be removed in v5.2. Please set `TENSORBOARD_LOGGING_DIR` instead.


In [26]:
torch.cuda.empty_cache()
peft_trainer.train()

Step,Training Loss,Validation Loss
100,1.542025,1.480162
200,1.346351,1.458464
300,1.370595,1.447223
400,1.360508,1.445439
500,1.335817,1.440325
600,1.39084,1.43642
700,1.35933,1.434214
800,1.306064,1.431995
900,1.354087,1.430049
1000,1.279286,1.427958


TrainOutput(global_step=2000, training_loss=1.3324966049194336, metrics={'train_runtime': 1652.8695, 'train_samples_per_second': 1.21, 'train_steps_per_second': 1.21, 'total_flos': 2.380351562177741e+16, 'train_loss': 1.3324966049194336, 'epoch': 1.0005002501250626})

In [27]:
def print_number_of_trainable_model_parameters(model):
    trainable_model_params = 0
    all_model_params = 0
    for _, param in model.named_parameters():
        all_model_params += param.numel()
        if param.requires_grad:
            trainable_model_params += param.numel()
    return f"trainable model parameters: {trainable_model_params}\n \
                all model parameters: {all_model_params}\n \
                percentage of trainable model parameters:  \
                {100 * trainable_model_params / all_model_params:.2f}%"

In [28]:
print_number_of_trainable_model_parameters(original_model)

'trainable model parameters: 54525952\n                 all model parameters: 4595126272\n                 percentage of trainable model parameters:                  1.19%'

In [29]:
# Free memory 
del original_model
del peft_trainer
torch.cuda.empty_cache()

In [30]:
!nvidia-smi

Fri Feb 27 00:22:35 2026       
+-----------------------------------------------------------------------------------------+
| NVIDIA-SMI 550.78                 Driver Version: 550.78         CUDA Version: 12.4     |
|-----------------------------------------+------------------------+----------------------+
| GPU  Name                 Persistence-M | Bus-Id          Disp.A | Volatile Uncorr. ECC |
| Fan  Temp   Perf          Pwr:Usage/Cap |           Memory-Usage | GPU-Util  Compute M. |
|                                         |                        |               MIG M. |
|   0  NVIDIA GeForce RTX 4090 D      On  |   00000000:A8:00.0 Off |                  Off |
| 30%   28C    P8             13W /  425W |   10781MiB /  24564MiB |      0%      Default |
|                                         |                        |                  N/A |
+-----------------------------------------+------------------------+----------------------+
                                                

In [31]:
compute_dtype = getattr(torch, "float16")

quant_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_compute_dtype=compute_dtype,
    bnb_4bit_use_double_quant=True,
)
model_path = "/root/autodl-tmp/LLM-Research/Meta-Llama-3-8B-Instruct/"
base_model = AutoModelForCausalLM.from_pretrained(
    model_path,
    torch_dtype=compute_dtype,
    device_map={"": 0},
    quantization_config=quant_config
)

Loading weights:   0%|          | 0/291 [00:00<?, ?it/s]

In [32]:
from peft import PeftModel
ft_model = PeftModel.from_pretrained(
    base_model, 
    "./peft-dialogue-summary-training/final-checkpoint/checkpoint-500",
    torch_dtype=torch.float16,
    is_trainable=False
)

In [33]:
!nvidia-smi

Fri Feb 27 00:23:33 2026       
+-----------------------------------------------------------------------------------------+
| NVIDIA-SMI 550.78                 Driver Version: 550.78         CUDA Version: 12.4     |
|-----------------------------------------+------------------------+----------------------+
| GPU  Name                 Persistence-M | Bus-Id          Disp.A | Volatile Uncorr. ECC |
| Fan  Temp   Perf          Pwr:Usage/Cap |           Memory-Usage | GPU-Util  Compute M. |
|                                         |                        |               MIG M. |
|   0  NVIDIA GeForce RTX 4090 D      On  |   00000000:A8:00.0 Off |                  Off |
| 30%   31C    P2             50W /  425W |   17911MiB /  24564MiB |      0%      Default |
|                                         |                        |                  N/A |
+-----------------------------------------+------------------------+----------------------+
                                                

In [34]:
eval_tokenizer = AutoTokenizer.from_pretrained(model_path, add_bos_token=True,
trust_remote_code=True, use_fast=False)
eval_tokenizer.pad_token = eval_tokenizer.eos_token

In [35]:
def gen(model,p, maxlen=100, sample=True):
    toks = eval_tokenizer(p, return_tensors="pt")
    res = model.generate(**toks.to("cuda"), max_new_tokens=maxlen, do_sample=sample,num_return_sequences=1,temperature=0.5,num_beams=1,top_p=0.95,).to('cpu')
    return eval_tokenizer.batch_decode(res,skip_special_tokens=True)

In [36]:
%%time
index = 10

prompt = dataset['test'][index]['dialogue']
summary = dataset['test'][index]['summary']
formatted_prompt = f"Instruct: Summarize the following conversation.\nInput:{prompt}\nOutput:\n"
res = gen(ft_model,formatted_prompt,200,)
#print(res[0])
output = res[0].split('Output:\n')[1]
dash_line = '-'.join('' for x in range(100))
print(dash_line)
print(f'INPUT PROMPT:\n{formatted_prompt}')
print(dash_line)
print(f'BASELINE HUMAN SUMMARY:\n{summary}\n')
print(dash_line)
print(f'PEFT MODEL GENERATION:\n{output}')

Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


---------------------------------------------------------------------------------------------------
INPUT PROMPT:
Instruct: Summarize the following conversation.
Input:#Person1#: Happy Birthday, this is for you, Brian.
#Person2#: I'm so happy you remember, please come in and enjoy the party. Everyone's here, I'm sure you have a good time.
#Person1#: Brian, may I have a pleasure to have a dance with you?
#Person2#: Ok.
#Person1#: This is really wonderful party.
#Person2#: Yes, you are always popular with everyone. and you look very pretty today.
#Person1#: Thanks, that's very kind of you to say. I hope my necklace goes with my dress, and they both make me look good I feel.
#Person2#: You look great, you are absolutely glowing.
#Person1#: Thanks, this is a fine party. We should have a drink together to celebrate your birthday
Output:

---------------------------------------------------------------------------------------------------
BASELINE HUMAN SUMMARY:
#Person1# attends Brian's birth