# Synthetic Review Generation with GPT-2

### Load Model

In [1]:
# Load the necessary packages 

import torch
import pandas as pd
from transformers import AutoTokenizer, AutoModelForCausalLM, set_seed

# Set seeds for reproducible and consistent results
set_seed(42)

2024-08-20 10:51:54.342493: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:9261] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
2024-08-20 10:51:54.342612: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:607] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2024-08-20 10:51:54.476418: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1515] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered


In [2]:
# Check if a CUDA-enabled GPU is available for PyTorch to use
device = "cuda" if torch.cuda.is_available() else "cpu"
device

'cuda'

In [3]:
# Load model directly from Huggingface
tokenizer = AutoTokenizer.from_pretrained("TomData/GPT2-review")
model = AutoModelForCausalLM.from_pretrained("TomData/GPT2-review")
model.to(device)

tokenizer_config.json:   0%|          | 0.00/873 [00:00<?, ?B/s]

vocab.json:   0%|          | 0.00/999k [00:00<?, ?B/s]

merges.txt:   0%|          | 0.00/456k [00:00<?, ?B/s]

added_tokens.json:   0%|          | 0.00/51.0 [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/579 [00:00<?, ?B/s]

Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.
Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


config.json:   0%|          | 0.00/967 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/1.42G [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/119 [00:00<?, ?B/s]

GPT2LMHeadModel(
  (transformer): GPT2Model(
    (wte): Embedding(50259, 1024)
    (wpe): Embedding(1024, 1024)
    (drop): Dropout(p=0.1, inplace=False)
    (h): ModuleList(
      (0-23): 24 x GPT2Block(
        (ln_1): LayerNorm((1024,), eps=1e-05, elementwise_affine=True)
        (attn): GPT2SdpaAttention(
          (c_attn): Conv1D()
          (c_proj): Conv1D()
          (attn_dropout): Dropout(p=0.1, inplace=False)
          (resid_dropout): Dropout(p=0.1, inplace=False)
        )
        (ln_2): LayerNorm((1024,), eps=1e-05, elementwise_affine=True)
        (mlp): GPT2MLP(
          (c_fc): Conv1D()
          (c_proj): Conv1D()
          (act): NewGELUActivation()
          (dropout): Dropout(p=0.1, inplace=False)
        )
      )
    )
    (ln_f): LayerNorm((1024,), eps=1e-05, elementwise_affine=True)
  )
  (lm_head): Linear(in_features=1024, out_features=50259, bias=False)
)

### Try different prompts

In [4]:
# List to store the results of the different prompts 
results_tested_prompts = []

In [10]:
model.eval() # set model in evaluation mode

# Below are different prompts that can be formulated and tested for generating reviews.
# These prompts are commented out for better traceability

#prompt_test = "T-shirt" 
#prompt_test = "The T-shirt is "
#prompt_test = "Generate a product review about a T-shirt. Review:"
#prompt_test = "I bought a T-shirt."
#prompt_test = "You can find a wide variety of T-shirts on Amazon. I recently ordered one, and it has finally arrived, allowing me to see it in person."
prompt_test = "I purchased this T-shirt on Amazon recently. Here's my experience so far."


# Tokenize the prompt and move the resulting tensors to the specified device (CPU or GPU).
inputs_test = tokenizer(prompt_test, return_tensors="pt").to(device)

# Print the tokenized input tensors to verify the prompt has been correctly tokenized.
print(inputs_test)

# Generate the reviews
outputs_test = model.generate(
                        inputs_test['input_ids'], 
                        attention_mask=inputs_test['attention_mask'],  # add attention mask
                        do_sample=True,  # Enable sampling to generate diverse sequences.
                        max_new_tokens = 200, # If reviews are too long, it seem to generate contradictory opinions about the T-shirt
                        min_new_tokens=50, # Ensure that the generated review is at least 50 tokens long.
                        top_p=0.95, # Use nucleus sampling (top-p)
                        num_return_sequences= 100,
                        pad_token_id=tokenizer.eos_token_id, # Aviod a warning
                        eos_token_id=tokenizer.eos_token_id 
                        )

# Define a function to compute the perplexity of a generated sequence.
def calculate_perplexity(model, input_ids, attention_mask):
    with torch.no_grad():
        outputs = model(input_ids=input_ids, attention_mask=attention_mask, labels=input_ids)
        loss = outputs.loss # Extract the loss value.
    return torch.exp(loss).item() # Return the exponential of the loss, which is the perplexity.

# Initialize a variable to accumulate the total perplexity across all generated reviews.
total_perplexity = 0

# Process each generated review to calculate its perplexity and store results.
for i, output in enumerate(outputs_test):
    # Decode the generated output to get the review text.
    review = tokenizer.decode(output, skip_special_tokens=True)
    
    # Extract the generated review by removing the original prompt from the decoded text.
    generated_review = review[len(prompt_test):].strip()
    
    # Tokenize the generated review for perplexity calculation.
    review_inputs = tokenizer(generated_review, return_tensors="pt").to(device)
    
    # Calculate perplexity for the individual review.
    perplexity = calculate_perplexity(model, review_inputs['input_ids'], review_inputs['attention_mask'])
    
    # Store the prompt, generated review, and its perplexity in the results list.
    results_tested_prompts.append([prompt_test, generated_review, perplexity])
    
    # Accumulate the perplexity for calculating the mean perplexity later.
    total_perplexity += perplexity

# Calculate the mean perplexity across all generated reviews.
mean_perplexity = total_perplexity / len(outputs_test)

# Append the mean perplexity to each review's entry in the results list.
for result in results_tested_prompts[-len(outputs_test):]:
    result.append(mean_perplexity)

# Print the last three generated reviews along with their individual perplexity and the mean perplexity across all reviews to get an overview. 
for i, (prompt, review, perplexity, mean_perplexity) in enumerate(results_tested_prompts[-3:]):
    print(f"Review {len(results_tested_prompts) - 3 + i}: {review}")
    print(f"Individual Perplexity: {perplexity:.2f}")
    print(f"Mean Perplexity across all reviews: {mean_perplexity:.2f}\n")

{'input_ids': tensor([[   40,  8155,   428,   309,    12, 15600,   319,  6186,  2904,    13,
          3423,   338,   616,  1998,   523,  1290,    13]], device='cuda:0'), 'attention_mask': tensor([[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]], device='cuda:0')}
Review 597: The fabric is a nice, soft, warm, warm and cozy! The white shirt is bright with vibrant colors and the perfect blend of a subtle and bright coral blue. I was worried that when the shirt arrived, and I was happy to see it in time for my wedding but I was very happy with my purchase. I am very happy with this product. It is well made and fits as described, it came in a very nice box and the shipping was just as expected.
Individual Perplexity: 12.13
Mean Perplexity across all reviews: 12.97

Review 598: I have bought this exact same shirt in the past (in the green, blue, camel, purple, white, red, etc.) The material is soft, stretchy, so it's kind of hot and cozy. It's like a soft, stretchy fleece-type T-shirt, 

In [11]:
# Convert the tested prompts with the generated reviews in a DataFrame.  
df_tested_prompts = pd.DataFrame(results_tested_prompts, columns=["Prompt", "Generated Review", "Perplexity", "Mean Perplexity"])
df_tested_prompts

Unnamed: 0,Prompt,Generated Review,Perplexity,Mean Perplexity
0,T-shirt,"for the cost, I didn't expect too thin. But, t...",11.957463,30.992594
1,T-shirt,is okay the color is too orange; not white. Bu...,18.851870,30.992594
2,T-shirt,"is the right length as pictured. Thin, light a...",12.051032,30.992594
3,T-shirt,. This scarf is so delicate and delicate. The ...,22.378967,30.992594
4,T-shirt,. Not as full and a tight-fitting-band is diff...,41.639233,30.992594
...,...,...,...,...
595,I purchased this T-shirt on Amazon recently. H...,This was not the same jacket as the real deal....,12.556925,12.972234
596,I purchased this T-shirt on Amazon recently. H...,I'm so pleased with it and I highly recommend....,11.027205,12.972234
597,I purchased this T-shirt on Amazon recently. H...,"The fabric is a nice, soft, warm, warm and coz...",12.131819,12.972234
598,I purchased this T-shirt on Amazon recently. H...,I have bought this exact same shirt in the pas...,9.560856,12.972234


In [12]:
# Display a random sample of 10 from the 100 generated reviews to check the quality manually. 

# Ensure pandas shows the full content of each string
pd.set_option('display.max_colwidth', None)

# Define all prompts to investigate in a list 
prompts = [
    "T-shirt",
    "The T-shirt is ",
    "Generate a product review about a T-shirt. Review:",
    "I bought a T-shirt.",
    "You can find a wide variety of T-shirts on Amazon. I recently ordered one, and it has finally arrived, allowing me to see it in person.",
    "I purchased this T-shirt on Amazon recently. Here's my experience so far."
]

# Loop through each prompt, filter the DataFrame, and print a random sample of 20 rows
for prompt_test in prompts:
    print(f"Prompt: {prompt_test}")
    
    # Filter rows where the "Prompt" column matches the current prompt.
    filtered_df = df_tested_prompts[df_tested_prompts["Prompt"] == prompt_test]
    
    # Check if the filtered DataFrame is not empty
    if not filtered_df.empty:
        # Display a random sample of 20 rows from the filtered DataFrame.
        sampled_df = filtered_df.sample(n=10, random_state=42)  # random_state is used for reproducibility
        
        # Loop through the sampled DataFrame and print each row with a line break
        for index, row in sampled_df.iterrows():
            print(f"Index: {index}")
            print(f"Review: {row['Generated Review']}")
            print(f"Perplexity: {row['Perplexity']:.2f}")
            print(f"Mean Perplexity: {row['Mean Perplexity']:.2f}")
            print("\n" + "-"*40 + "\n")  # Line break and separator between reviews
    else:
        print("No matching entries found for this prompt.")
    
    print("\n" + "-"*80 + "\n")  # Add a separator between outputs for each prompt

Prompt: T-shirt
Index: 83
Review: , super, like a bag, very soft and very soft, perfect for my nephew! I love it! Looks great and is quite comfy. I like that it is lightweight, and the hood is exactly what I expected. I would highly recommend it!
Perplexity: 17.70
Mean Perplexity: 30.99

----------------------------------------

Index: 53
Review: ! Loved the look, but the fit was weird and uncomfortable on my arms. It was very thin nylon, but very cheap plastic. So if you are a lot more heavy and looking for a more lightweight, even with the white leather parts on the edges. The blue were nice, and was great! Super cute and comfy! Just not a fan of the best!
Perplexity: 28.12
Mean Perplexity: 30.99

----------------------------------------

Index: 70
Review: is not as I expected but the design is really off on the shirt. The picture looks better. My friend liked the shirt. We will be looking for a shirt, so disappointing. Now we have to have apron for Halloween! And it is a costume for

In [14]:
# Save the DataFrame to a CSV file. 
df_tested_prompts.to_csv('GPT2_tested_prompts.csv', index=False)

### Generate the Sample

The sample consists of 10 000 generated reviews. 

In [15]:
# Generate the Reviews 

model.eval() # set model in evaluation mode

# Define the prompt
prompts = "I purchased this T-shirt on Amazon recently. Here's my experience so far."


# List to store results
results = []

# Number of total reviews to generate
total_reviews = 10000

# Number of reviews per batch
batch_size = 100

# Initialize a variable to accumulate the total perplexity across all generated reviews.
total_perplexity = 0

# Loop to generate the required number of reviews
for i in range(total_reviews // batch_size):

        # Tokenize the prompt 
        inputs = tokenizer(prompt, return_tensors="pt").to(device)

        #print(inputs)

        # Generate the reviews
        outputs = model.generate(
                        inputs['input_ids'], 
                        attention_mask=inputs['attention_mask'],  # add attention mask
                        do_sample=True,  # Enable sampling to generate diverse sequences.
                        max_new_tokens = 200, # If reviews are too long, it seem to generate contradictory opinions about the T-shirt
                        min_new_tokens=50, # Ensure that the generated review is at least 50 tokens long.
                        top_p=0.95, # Use nucleus sampling (top-p)
                        num_return_sequences= batch_size,
                        pad_token_id=tokenizer.eos_token_id, # Aviod a warning
                        eos_token_id=tokenizer.eos_token_id 
                        )


        # Process each generated review to calculate its perplexity and store results
        for output in outputs:
            # Decode the generated output to get the review text
            review = tokenizer.decode(output, skip_special_tokens=True)

            # Extract the generated review by removing the original prompt from the decoded text
            generated_review = review[len(prompt_test):].strip()

            # Tokenize the generated review for perplexity calculation
            review_inputs = tokenizer(generated_review, return_tensors="pt").to(device)

            # Calculate perplexity for the individual review
            perplexity = calculate_perplexity(model, review_inputs['input_ids'], review_inputs['attention_mask'])

            # Store the prompt, generated review, and its perplexity in the results list
            results.append([prompt_test, generated_review, perplexity])

            # Accumulate the perplexity for calculating the mean perplexity later
            total_perplexity += perplexity

# Calculate the mean perplexity across all generated reviews
mean_perplexity = total_perplexity / total_reviews

# Append the mean perplexity to each review's entry in the results list
for result in results:
    result.append(mean_perplexity)

In [16]:
# Convert the results into a DataFrame
df = pd.DataFrame(results, columns=["Prompt", "Generated Review", "Perplexity", "Mean Perplexity"])
df

Unnamed: 0,Prompt,Generated Review,Perplexity,Mean Perplexity
0,I purchased this T-shirt on Amazon recently. Here's my experience so far.,"I don't like wearing a shirt in my back-up to prevent me from behind my neck, a little belly, or around a bit, but otherwise great quality shirt, great buy. I've been wanting something cute to pull over my pants and t-shirt to give me some extra support, so I bought a larger size. This is perfect for layering with a bra and blazer in both front pocket and the back. Love this style and love it.I've never had to go back to wearing it, and again. I've got many compliments on this shirt, many complements and this shirt is of very good quality.",15.337263,12.965813
1,I purchased this T-shirt on Amazon recently. Here's my experience so far.,"The fit, color, the style. What I got was a great fit and a cute shirt. It was very wide and very wide. I wear a 14/16 for a woman, I got this for my 11 yr old sister who typically wears 12. She wears a 13 and I got the xxl, which is what I would normally wear. If you're on the smaller side then, you should have to try the next size up.",13.156591,12.965813
2,I purchased this T-shirt on Amazon recently. Here's my experience so far.,"The picture is misleading. It's not a highlighter, but it's a more high-quality. There are no tag inside it, and it is a heavy knit. It is a cheap thin T-shirt. It's cheap material so it feels like the shirt was made for someone with a very weak necked body. You'll probably need to purchase a girdle skirt or something this probably won't work for you. It does run big, and I purchased an XS. The XL was so big and I can't even button the buttons because they are a little tight on me.The seams at the neckline looked terrible and the button holes were poorly sewn. If the collar hadn't been sewn on, I would have had to pay to ship it back. I am not going to give it 2 stars for the quality. I'm super disappointed and since it is a beautiful shirt, it's a good quality product.",9.852147,12.965813
3,I purchased this T-shirt on Amazon recently. Here's my experience so far.,First one fits and looks great on. I would recommend this item to anyone. The seller was quick too and sent me a replacement. Thank you! They loved the shirt! It washes well and washed up nicely. I highly recommend!!!!!,17.366442,12.965813
4,I purchased this T-shirt on Amazon recently. Here's my experience so far.,"It fits as expected. The colors are vibrant. It is very soft and comfortable, the only thing that I don't like is that the size was spot on, but the back is so small it makes your neck look ridiculous and not flattering at all.",10.177964,12.965813
...,...,...,...,...
9995,I purchased this T-shirt on Amazon recently. Here's my experience so far.,"This tank was very cute and great quality at first glance. I ordered it in a large which usually runs small. Normally I wear a large, so I ordered a Large and it was quite tight. I had to return it. The fit was perfect, the material seemed nice and the neckline was a bit snug. It felt comfortable, but maybe a medium would have been better. I didn't realize the size issue would have lost a lot of weight to the fabric.",9.039225,12.965813
9996,I purchased this T-shirt on Amazon recently. Here's my experience so far.,I have 3 others that are different fabric. This was not a good quality product at all. It's super thin and you can see EVERYTHING. The fit was fine but the material of the sweater was very thin so the sizing wasn't accurate.,14.228069,12.965813
9997,I purchased this T-shirt on Amazon recently. Here's my experience so far.,"In my opinion, I didn't have time to return it to my surprise of the disappointment. It was very comfortable and washes up nicely and does the job. I wore this to the office and was very happy with the workmanship and fit and support of the coat. This will be the only positive I have found that I will probably not get to wear the shirt.",16.189533,12.965813
9998,I purchased this T-shirt on Amazon recently. Here's my experience so far.,"I ended up giving it to my 12 year old granddaughter, but the tag said that it had a hole in the seam right out of the sleeve, which looks like someone tried to pull it down. I'm glad I had really high hopes. Then, the jacket ripped at the bottom seam. So I was disappointed.",11.614868,12.965813


In [18]:
# Save the DataFrame to a CSV file
df.to_csv('GPT2_sample.csv', index=False)

### Sample for the Human Evaluation Sample

For human evaluation we use a sample of 100 reviews from the 10 000 for the quantitative analysis.

In [19]:
# Create a random sample of 100 reviews
df_human_eval = df.sample(n=100, random_state=42)
df_human_eval
    
# Save the sampled DataFrame to a CSV file
df_human_eval.to_csv('GPT2_human_evaluation.csv', index=False)

In [20]:
df_human_eval

Unnamed: 0,Prompt,Generated Review,Perplexity,Mean Perplexity
6252,I purchased this T-shirt on Amazon recently. Here's my experience so far.,"I will buy my entire one on line. The quality is great, just the right amount. It's not too thick and doesn't slide off your wrist when you're sitting. I highly recommend it to anyone who likes great products. Just note that this is a very poor quality garment.",19.856007,12.965813
4684,I purchased this T-shirt on Amazon recently. Here's my experience so far.,"This thing is that it's made out of a plastic plastic, stiff, like flannel and the threads are tearing. There are seams hanging out all over the place where they should be sewn on, and the sleeves are sewn in an odd shape. The sleeves aren't even sewn on well, and the quality is lacking. For $11, I don't think this would have happened.",12.631145,12.965813
1731,I purchased this T-shirt on Amazon recently. Here's my experience so far.,"After one use, the jacket is a little short. It's great to hang around but it's a good sleeve length which does not stay in place. Would definitely buy more of these. Highly recommend for the office!I would recommend. I'll give it 5 stars as it's adorable.",25.269028,12.965813
4742,I purchased this T-shirt on Amazon recently. Here's my experience so far.,"We are the RIPPING. We bought this for our 2 year old to go back to school. I wanted it to be more loose, but not so much. The material is very thin and not soft and very thin, although a 2 year old's bathrobe is very soft on one end, he has to tuck the lanyard on. We have a feeling this top would be helpful for a child to wear when he starts his arms, which I was not happy. It doesn't keep my son warm for longer than some, especially since the fabric was thin and he just kept cold weather all day, and it was very warm. I recommend this seller.",19.150295,12.965813
4521,I purchased this T-shirt on Amazon recently. Here's my experience so far.,"The shirt is absolutely adorable. It's not too much of a cosplay shirt, but it's a great quality and fits great. I wish I could choose from these companies. They are a great buy. The quality is really great. I'd definitely buy again.I've really liked the way they look.",12.928615,12.965813
...,...,...,...,...
3787,I purchased this T-shirt on Amazon recently. Here's my experience so far.,"It's very well made and I have a lot of trouble putting my clothes on it. It's not as warm as I hoped. It does not take any getting in, however, I have very sensitive skin and I love the material. I had hoped for a better product based on reviews, but I was concerned about the fit or sizing. I went ahead and ordered a XL. (I normally wear L or M/XL) and it fit perfect. It is snug but I can easily move in it and still be comfortable wearing it.",9.645207,12.965813
9189,I purchased this T-shirt on Amazon recently. Here's my experience so far.,I am very pleased with the product. Thank you and so much. Thank you! Be ready to order more in different colors. I have received many compliments on my shirt. This shirt is perfect for travel and to the beach. Will definitely buy another one!,11.125388,12.965813
7825,I purchased this T-shirt on Amazon recently. Here's my experience so far.,"This suit was my new favorite workout bra. I ordered the black and black. To start...I am 5'8"" and the s/m/m on me is too small (I've worn it with it and I can't even tried the bra on). The top is fine. It was just not meant for my body type.",16.747879,12.965813
7539,I purchased this T-shirt on Amazon recently. Here's my experience so far.,I ordered the size that's recommended to order your normal size. It was not small at all... I'm a size 8-12. I ordered the size Medium because I wanted to have a snug fit. Unfortunately I ended up returning it because I loved the color! It's not really a bad shirt. I'm a medium and hope they make the right size.,11.672304,12.965813
