## Synthetic Review Generation with GPT-2

### Load Model

In [1]:
from transformers import GPT2LMHeadModel,  GPT2Tokenizer, GPT2Config, GPT2LMHeadModel, set_seed
import torch
import pandas as pd

In [2]:
device = "cuda" if torch.cuda.is_available() else "cpu"
device

'cuda'

In [4]:
#Load the fine-tuned model and vocabulary
output_dir = "/kaggle/input/gpt2-medium-test/GPT2-medium"
model = GPT2LMHeadModel.from_pretrained(output_dir)
tokenizer = GPT2Tokenizer.from_pretrained(output_dir)
model.to(device)

Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


GPT2LMHeadModel(
  (transformer): GPT2Model(
    (wte): Embedding(50259, 1024)
    (wpe): Embedding(1024, 1024)
    (drop): Dropout(p=0.1, inplace=False)
    (h): ModuleList(
      (0-23): 24 x GPT2Block(
        (ln_1): LayerNorm((1024,), eps=1e-05, elementwise_affine=True)
        (attn): GPT2SdpaAttention(
          (c_attn): Conv1D()
          (c_proj): Conv1D()
          (attn_dropout): Dropout(p=0.1, inplace=False)
          (resid_dropout): Dropout(p=0.1, inplace=False)
        )
        (ln_2): LayerNorm((1024,), eps=1e-05, elementwise_affine=True)
        (mlp): GPT2MLP(
          (c_fc): Conv1D()
          (c_proj): Conv1D()
          (act): NewGELUActivation()
          (dropout): Dropout(p=0.1, inplace=False)
        )
      )
    )
    (ln_f): LayerNorm((1024,), eps=1e-05, elementwise_affine=True)
  )
  (lm_head): Linear(in_features=1024, out_features=50259, bias=False)
)

### Try different prompts

In [5]:
# List to store results
results_tested_prompts = []

In [6]:
model.eval() # set model in evaluation mode

prompt_test = "I bought a T-shirt." # Enter the prompt to try

In [8]:
# Tokenize the prompt 
inputs_test = tokenizer(prompt_test, return_tensors="pt").to(device)

print(inputs_test)

# generate the reviews 
outputs_test = model.generate(
                        inputs_test['input_ids'], 
                        attention_mask=inputs_test['attention_mask'],  # add attention mask
                        do_sample=True,  
                        max_length = 300, # in reviews that are too long, it seem to generate contradictory opinions about the T-shirt
                        min_length=50,
                        top_k=50,
                        top_p=0.8, 
                        num_return_sequences=3,
                        pad_token_id=tokenizer.eos_token_id # aviod a warning
                        )
    
# Store results in the list to have an overview over the already tested prompts with the corresponding reviews 
for i, output in enumerate(outputs_test):
    review = tokenizer.decode(output, skip_special_tokens=True)
    # Remove the prompt text from the generated review
    generated_review = review[len(prompt_test):].strip()
    results_tested_prompts.append([prompt_test, generated_review])
    
# Print the last three added reviews
for i, (prompt, review) in enumerate(results_tested_prompts[-3:]):
    print(f"Review {len(results_tested_prompts) - 2 + i}: {review}\n")

{'input_ids': tensor([[   40,  5839,   257,   309,    12, 15600,    13]], device='cuda:0'), 'attention_mask': tensor([[1, 1, 1, 1, 1, 1, 1]], device='cuda:0')}
Review 1: Got a light grey shirt with a huge white patch. Looks nothing like the picture. I still kept it because it’s still cute and dute. Would definitely recommend. Took a chance on getting the white one.

Review 2: It is not a T-shirt, but more of a T-shirt.  That being said, it is a little small in the chest area.  I will wear it with a camisole and a tank top underneath, but it is not what I expected.  I wanted a white cardigan, which is my usual white white shirt with lace and red lace in the front.

Review 3: I love the shirt. Fit well, but the material is not the softest. It has a shine to it. It’s a bit thin, but it’s not see through. I would buy it again, but I’d probably not buy it again.



In [9]:
# Convert the tested prompts with the three generated reviews in a DataFrame 
df_tested_prompts = pd.DataFrame(results_tested_prompts, columns=["Prompt", "Generated Review"])
df_tested_prompts

Unnamed: 0,Prompt,Generated Review
0,I bought a T-shirt.,Got a light grey shirt with a huge white patch...
1,I bought a T-shirt.,"It is not a T-shirt, but more of a T-shirt. T..."
2,I bought a T-shirt.,"I love the shirt. Fit well, but the material i..."


In [10]:
# Save the DataFrame to a CSV file
df_tested_prompts.to_csv('GPT2_prompts.csv', index=False)

### Generate the Sample

The sample consists of 10 000 generated reviews. We use five prompts to generate with each 2000 reviews. From these subsamples 20 reviews are sampled to receive a sample of 100 reviews for the human evaluation. 

In [None]:
# Generate the Reviews 

model.eval() # set model in evaluation mode

# Define the prompts
prompts = [
    "I bought a T-shirt.", # Prompt 1
    "This is my second time purchasing this T-shirt.", # Prompt 2
    "The T-shirt I ordered was too small.", # Prompt 3
    "I love the design of this T-shirt.", # Prompt 4
    "This T-shirt is perfect for casual wear." # Prompt 5
]

# List to store results
results = []

for prompt_number, prompt in enumerate(prompts, 1):
    for i in range(200):

        # Tokenize the prompt 
        inputs = tokenizer(prompt, return_tensors="pt").to(device)

        #print(inputs)

        # Generate the output
        outputs = model.generate(
                                inputs['input_ids'], 
                                attention_mask=inputs['attention_mask'],  # add attention mask
                                do_sample=True,  
                                max_length = 300, # in reviews that are too long, it seem to generate contradictory opinions about the T-shirt
                                min_length=50,
                                top_k=50,
                                top_p=0.8, 
                                num_return_sequences=10,
                                pad_token_id=tokenizer.eos_token_id # aviod a warning
                                )


        # Store all generated sequences in the list
        for output in outputs:
            review = tokenizer.decode(output, skip_special_tokens=True)
            # Remove the prompt text from the generated review
            generated_review = review[len(prompt):].strip()
            results.append([f'Prompt {prompt_number}', prompt, generated_review])


In [13]:
# Convert the results into a DataFrame
df = pd.DataFrame(results, columns=["Prompt Number", "Prompt", "Generated Review"])
df

Unnamed: 0,Prompt Number,Prompt,Generated Review
0,Prompt f1,I bought a T-shirt.,It’s very soft and comfortable. Washed nicely....
1,Prompt f1,I bought a T-shirt.,"Great material, excellent quality. Fit is as ..."
2,Prompt f1,I bought a T-shirt.,Got an empty package. I threw it in the trash...
3,Prompt f1,I bought a T-shirt.,Got a tee shirt. It's a little tight across th...
4,Prompt f1,I bought a T-shirt.,"It looks nice, but is made of a shiny material..."
...,...,...,...
9995,Prompt f5,This T-shirt is perfect for casual wear.,Comfortable and soft material. Washes well. No...
9996,Prompt f5,This T-shirt is perfect for casual wear.,"It’s lightweight, breathable and has the right..."
9997,Prompt f5,This T-shirt is perfect for casual wear.,Lightweight and comfy. Great for any work out....
9998,Prompt f5,This T-shirt is perfect for casual wear.,I got a large as I am 5'7&#34; and about 135 l...


In [14]:
df['Generated Review'][0]

'It’s very soft and comfortable. Washed nicely. Got a lot of compliments. It’s comfortable. A little longer than I like. But it’s not terrible. It’s a little shorter than I expected.'

In [39]:
# Save the DataFrame to a CSV file
df.to_csv('GPT2_sample.csv', index=False)

### Sample for the Human Evaluation Sample

In [16]:
df_human_eval = df.groupby('Prompt Number').sample(n=20, random_state=42)
df_human_eval
    
# Save the sampled DataFrame to a CSV file
df_human_eval.to_csv('GPT2_human_evaluation.csv', index=False)

Unnamed: 0,Prompt Number,Prompt,Generated Review
1860,Prompt f1,I bought a T-shirt.,Came with a hole in it. I had to return it and...
353,Prompt f1,I bought a T-shirt.,Got a T-shirt dress. Yuck. I could have gotten...
1333,Prompt f1,I bought a T-shirt.,It's super cute and super soft. I like it. It'...
905,Prompt f1,I bought a T-shirt.,Got a 2T for my son who is between kid and adu...
1289,Prompt f1,I bought a T-shirt.,It was long enough and the fabric is very soft...
...,...,...,...
8184,Prompt f5,This T-shirt is perfect for casual wear.,It is thin and soft and comfortable. I love th...
8785,Prompt f5,This T-shirt is perfect for casual wear.,I got it for my son who wears a large. This t-...
9887,Prompt f5,This T-shirt is perfect for casual wear.,Material was thin but not see through. I orde...
9227,Prompt f5,This T-shirt is perfect for casual wear.,T-shirt is slightly tight but will work fine f...
