In [1]:
import tensorflow as tf
from transformers import TFRobertaForSequenceClassification, RobertaTokenizer
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
import pandas as pd


In [2]:
!pip install transformers


Collecting transformers
  Downloading transformers-4.31.0-py3-none-any.whl (7.4 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m7.4/7.4 MB[0m [31m29.8 MB/s[0m eta [36m0:00:00[0m
Collecting huggingface-hub<1.0,>=0.14.1 (from transformers)
  Downloading huggingface_hub-0.16.4-py3-none-any.whl (268 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m268.8/268.8 kB[0m [31m30.4 MB/s[0m eta [36m0:00:00[0m
Collecting tokenizers!=0.11.3,<0.14,>=0.11.1 (from transformers)
  Downloading tokenizers-0.13.3-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (7.8 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m7.8/7.8 MB[0m [31m79.4 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting safetensors>=0.3.1 (from transformers)
  Downloading safetensors-0.3.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (1.3 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.3/1.3 MB[0m [31m75.1 MB/s[0m eta [36m0:00:0

# Loading data

In [2]:
data_fake_reviews = pd.read_csv('./fake reviews dataset.csv')

In [3]:
data_fake_reviews.shape

(40432, 4)

In [4]:
data_fake_reviews.head()

Unnamed: 0,category,rating,label,text_
0,Home_and_Kitchen_5,5.0,CG,"Love this! Well made, sturdy, and very comfor..."
1,Home_and_Kitchen_5,5.0,CG,"love it, a great upgrade from the original. I..."
2,Home_and_Kitchen_5,5.0,CG,This pillow saved my back. I love the look and...
3,Home_and_Kitchen_5,1.0,CG,"Missing information on how to use it, but it i..."
4,Home_and_Kitchen_5,5.0,CG,Very nice set. Good quality. We have had the s...


In [5]:
data_fake_reviews.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 40432 entries, 0 to 40431
Data columns (total 4 columns):
 #   Column    Non-Null Count  Dtype  
---  ------    --------------  -----  
 0   category  40432 non-null  object 
 1   rating    40432 non-null  float64
 2   label     40432 non-null  object 
 3   text_     40432 non-null  object 
dtypes: float64(1), object(3)
memory usage: 1.2+ MB


In [6]:
data_fake_reviews.label.unique()

array(['CG', 'OR'], dtype=object)

In [7]:
data_fake_reviews.columns

Index(['category', 'rating', 'label', 'text_'], dtype='object')

# Obtain the training dataset and validation dataset

In [8]:
dict_target = {'CG':0, 'OR':1}
def encode_label(x):
    return dict_target.get(x,-1)

In [9]:
data_fake_reviews["target"] = data_fake_reviews["label"].apply(lambda x: encode_label(x))

In [10]:
# Split the data into training and validation sets
train_dataset, tv_dataset = train_test_split(data_fake_reviews, test_size=0.4, shuffle=True, stratify=None, random_state=2021)

In [11]:
test_dataset_raw, valid_dataset = train_test_split(tv_dataset, test_size=0.5, shuffle=True, stratify=None, random_state=2021)

In [12]:
# Load the tokenizer
tokenizer = RobertaTokenizer.from_pretrained('roberta-base')

In [13]:
# Tokenize the data
train_encodings = tokenizer(train_dataset['text_'].tolist(), truncation=True, padding=True, max_length=256)
valid_encodings = tokenizer(valid_dataset['text_'].tolist(), truncation=True, padding=True, max_length=256)
test_encodings = tokenizer(test_dataset_raw['text_'].tolist(), truncation=True, padding=True, max_length=256)

In [15]:
# Convert the data to TensorFlow tensors
train_dataset = tf.data.Dataset.from_tensor_slices((
    dict(train_encodings),
    train_dataset['target'].values
))
valid_dataset = tf.data.Dataset.from_tensor_slices((
    dict(valid_encodings),
    valid_dataset['target'].values
))
test_dataset = tf.data.Dataset.from_tensor_slices((
    dict(test_encodings),
    test_dataset_raw['target'].values
))

# Load the model

In [16]:
# Load the model
model = TFRobertaForSequenceClassification.from_pretrained('roberta-base')



Some weights of the PyTorch model were not used when initializing the TF 2.0 model TFRobertaForSequenceClassification: ['roberta.embeddings.position_ids']
- This IS expected if you are initializing TFRobertaForSequenceClassification from a PyTorch model trained on another task or with another architecture (e.g. initializing a TFBertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing TFRobertaForSequenceClassification from a PyTorch model that you expect to be exactly identical (e.g. initializing a TFBertForSequenceClassification model from a BertForSequenceClassification model).
Some weights or buffers of the TF 2.0 model TFRobertaForSequenceClassification were not initialized from the PyTorch model and are newly initialized: ['classifier.dense.weight', 'classifier.dense.bias', 'classifier.out_proj.weight', 'classifier.out_proj.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predicti

In [17]:

# Compile the model
model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=1e-5),
              loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True),
              metrics=['accuracy'])

In [18]:

# Train the model
model.fit(train_dataset.shuffle(1000).batch(16), epochs=1, batch_size=16, validation_data=valid_dataset.batch(16))



<keras.callbacks.History at 0x7ddbf12af8b0>

# evaluate the performance

In [19]:
from sklearn.metrics import classification_report
import numpy as np
# Make predictions on the validation dataset
test_dataset_batched = test_dataset.batch(16)
y_pred_raw = model.predict(test_dataset_batched)

# Convert raw predictions to class predictions
y_pred = np.argmax(y_pred_raw.logits, axis=1)

# Get the actual class labels
y_true = []
for features, label in test_dataset:
    y_true.append(label.numpy())
y_true = np.array(y_true)


# Generate a classification report
report = classification_report(y_true, y_pred, target_names=["CG", "OR"])  # replace target_names with your actual class names
print(report)


              precision    recall  f1-score   support

          CG       0.93      1.00      0.96      3974
          OR       1.00      0.93      0.96      4112

    accuracy                           0.96      8086
   macro avg       0.96      0.96      0.96      8086
weighted avg       0.96      0.96      0.96      8086



In [None]:
model.save('my_model_fake_reviews')





# Test dataset with 150


In [21]:
test_dataset_raw.shape

(8086, 5)

In [22]:
test_dataset_raw.head()

Unnamed: 0,category,rating,label,text_,target
31179,Books_5,5.0,CG,Rick Bragg is one of the most entertaining cha...,0
26353,Kindle_Store_5,5.0,CG,Ms. Rasley's prose never fails to capture the ...,0
9403,Electronics_5,5.0,OR,Rebuilt my PC from the ground up and I absolut...,1
13753,Movies_and_TV_5,5.0,CG,An awesome show! Got a few of the movies on dv...,0
19789,Pet_Supplies_5,5.0,OR,"Highly recommend this brand of milk replacer, ...",1


In [23]:
test_dataset_raw_0 = test_dataset_raw[test_dataset_raw['target']==0]

In [24]:
test_dataset_raw_0.shape

(3974, 5)

In [25]:
random_50_rows = test_dataset_raw_0.sample(n=50)

# Build the new DataFrame using the randomly selected rows
test_dataset_raw_0_50 = pd.DataFrame(random_50_rows)

In [28]:
test_dataset_raw_0_50.head()

Unnamed: 0,category,rating,label,text_,target
19080,Tools_and_Home_Improvement_5,5.0,CG,I really like the Skeletool. I have an old Sk...,0
37380,Clothing_Shoes_and_Jewelry_5,5.0,CG,Well made belt. High quality material. The onl...,0
6466,Sports_and_Outdoors_5,5.0,CG,"Perfect for 18"" inch dolls. The only problem i...",0
3934,Home_and_Kitchen_5,3.0,CG,This Martha Stewart pom-pom flower pot. It's ...,0
33275,Toys_and_Games_5,5.0,CG,I can't say enough good things about this prod...,0


In [29]:
test_dataset_raw_1 = test_dataset_raw[test_dataset_raw['target']==1]

In [30]:
random_1_rows = test_dataset_raw_1.sample(n=100)

# Build the new DataFrame using the randomly selected rows
test_dataset_raw_1_100 = pd.DataFrame(random_1_rows)

In [31]:
test_dataset_raw_1_100.shape

(100, 5)

In [32]:
merged_df = pd.concat([test_dataset_raw_0_50, test_dataset_raw_1_100], ignore_index=True)

# Shuffle the index
test_150_df = merged_df.sample(frac=1).reset_index(drop=True)

In [33]:
test_150_df

Unnamed: 0,category,rating,label,text_,target
0,Electronics_5,5.0,OR,I am amazed. These are light and comfortable....,1
1,Movies_and_TV_5,5.0,OR,This is one of my favorite movies. It's a cat...,1
2,Toys_and_Games_5,5.0,CG,"This deck is a good starter set, with lots of ...",0
3,Pet_Supplies_5,4.0,OR,"Oh, gosh, I can't believe it's taken me this l...",1
4,Toys_and_Games_5,5.0,OR,great for the price even came in a back pack t...,1
...,...,...,...,...,...
145,Toys_and_Games_5,5.0,CG,We purchased this camera for my daughter and s...,0
146,Electronics_5,5.0,OR,I purchased one of these to replace an error p...,1
147,Kindle_Store_5,5.0,OR,"I picked this up, expecting just another short...",1
148,Pet_Supplies_5,4.0,OR,My dog loves this toy and is always chewing on...,1


In [34]:
test_encodings_150 = tokenizer(test_150_df['text_'].tolist(), truncation=True, padding=True, max_length=256)

test_dataset_150 = tf.data.Dataset.from_tensor_slices((
    dict(test_encodings_150),
    test_150_df['target'].values
))



In [35]:
test_dataset_150_batched = test_dataset_150.batch(16)
y_pred_raw_150 = model.predict(test_dataset_150_batched)

# Convert raw predictions to class predictions
y_pred_150 = np.argmax(y_pred_raw_150.logits, axis=1)

# Get the actual class labels
y_true_150 = []
for features, label in test_dataset_150:
    y_true_150.append(label.numpy())
y_true_150 = np.array(y_true_150)


# Generate a classification report
report_150 = classification_report(y_true_150, y_pred_150, target_names=["CG", "OR"])  # replace target_names with your actual class names
print(report_150)

              precision    recall  f1-score   support

          CG       0.91      0.98      0.94        50
          OR       0.99      0.95      0.97       100

    accuracy                           0.96       150
   macro avg       0.95      0.96      0.96       150
weighted avg       0.96      0.96      0.96       150



# chatgpt

In [39]:
test_dataset_raw_0_50.index

Int64Index([19080, 37380,  6466,  3934, 33275, 24286, 11892, 36412,  5625,
            25570,  8501, 20411, 28867, 12409, 33851, 34246, 39476, 13831,
            24371,  9662, 33463,  2954, 36835, 11507, 17802, 28244, 10309,
            31786, 26381,  3940, 12791, 21130, 26333, 34131, 28036, 21829,
            36002, 32896, 13185,  4173,  6265, 36584, 18300, 36500,  8968,
            32790, 26048, 10131, 28084,  6407],
           dtype='int64')

In [140]:
test_dataset_raw_0_50['text_'][6407]

"I now want these in a larger size. I've also used them for a few hours in a cave. The seams are solid and they have a nice feel to them. I have had no issues with the seams becoming loose."

In [40]:
test_dataset_raw_0_50['text_'][19080] = "I really admire the Skeletool; it's the first saw I've ever owned, and while it may be a bit pricey, it's definitely worth the investment. The clean cuts it delivers are impressive, and I find myself using it on numerous projects. Recently, I decided to purchase a new one and used it to cut with a saw I've had for about a year now. The Skeletool's build quality is excellent, but it is quite heavy, which can be a bit challenging on my workbench. However, I've still grown fond of it. One minor issue I've encountered is that it doesn't work as effectively when pushing the handle back against the base. Nonetheless, I'm generally satisfied with its performance and reliability."

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  test_dataset_raw_0_50['text_'][19080] = "I really admire the Skeletool; it's the first saw I've ever owned, and while it may be a bit pricey, it's definitely worth the investment. The clean cuts it delivers are impressive, and I find myself using it on numerous projects. Recently, I decided to purchase a new one and used it to cut with a saw I've had for about a year now. The Skeletool's build quality is excellent, but it is quite heavy, which can be a bit challenging on my workbench. However, I've still grown fond of it. One minor issue I've encountered is that it doesn't work as effectively when pushing the handle back against the base. Nonetheless, I'm generally satisfied with its performance and reliability."


In [42]:
test_dataset_raw_0_50['text_'][37380] ="The belt is crafted with excellent craftsmanship and made from high-quality materials. The only drawback I've noticed is that it comes with a relatively small buckle."

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  test_dataset_raw_0_50['text_'][37380] ="The belt is crafted with excellent craftsmanship and made from high-quality materials. The only drawback I've noticed is that it comes with a relatively small buckle."


In [45]:
test_dataset_raw_0_50['text_'][6466] ="This item is an ideal fit for 18-inch dolls. However, there's a minor issue with a small screw that comes with it, as it tends to be overly tight. Additionally, the clamp that holds the doll may be a bit loose, although this shouldn't pose a problem for the majority of dolls."

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  test_dataset_raw_0_50['text_'][6466] ="This item is an ideal fit for 18-inch dolls. However, there's a minor issue with a small screw that comes with it, as it tends to be overly tight. Additionally, the clamp that holds the doll may be a bit loose, although this shouldn't pose a problem for the majority of dolls."


In [47]:
test_dataset_raw_0_50['text_'][3934] = "I purchased the Martha Stewart pom-pom flower pot, and it fits perfectly in my kitchen, adding a lovely touch to the decor. The top features a durable stainless steel ring with a sturdy ceramic edge, making it a visually appealing piece. Although it might not be the most budget-friendly option, I find it to be a worthwhile addition to my kitchen. I received a free sample of this product at a discounted price in exchange for providing my honest and unbiased review, and I couldn't be happier with the purchase. Having used this product, I can confidently say that it has become an essential item in my kitchen. I'm delighted to have received it at a discount in exchange for my honest and unbiased review, and I genuinely believe it has been a valuable addition to my home"

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  test_dataset_raw_0_50['text_'][3934] = "I purchased the Martha Stewart pom-pom flower pot, and it fits perfectly in my kitchen, adding a lovely touch to the decor. The top features a durable stainless steel ring with a sturdy ceramic edge, making it a visually appealing piece. Although it might not be the most budget-friendly option, I find it to be a worthwhile addition to my kitchen. I received a free sample of this product at a discounted price in exchange for providing my honest and unbiased review, and I couldn't be happier with the purchase. Having used this product, I can confidently say that it has become an essential item in my kitchen. I'm delighted to have received it at a discount in exchange for my honest and unbiased review, and I genuinely believe it has been a valua

In [49]:
test_dataset_raw_0_50['text_'][33275] = "I cannot express how much I appreciate this product, and it brings me immense joy to see how much my daughter loves it."

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  test_dataset_raw_0_50['text_'][33275] = "I cannot express how much I appreciate this product, and it brings me immense joy to see how much my daughter loves it."


In [51]:
test_dataset_raw_0_50['text_'][24286] ="It's just perfect for that special time of year when her sister returns for a new year, and we get a glimpse of their joy and excitement."

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  test_dataset_raw_0_50['text_'][24286] ="It's just perfect for that special time of year when her sister returns for a new year, and we get a glimpse of their joy and excitement."


In [53]:
test_dataset_raw_0_50['text_'][11892] ="When using the device with my computer running Windows XP and Windows 7, I found that it connected easily without direct computer connection, requiring only a USB cable. However, when attempting to connect it to my Windows 8.1 computer with an old hard drive, I encountered some confusion and had to take a few extra steps to establish a stable connection. Overall, the device worked well with Windows XP and Windows 7, but required additional attention for proper connectivity with the Windows 8.1 computer and old hard drive setup."

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  test_dataset_raw_0_50['text_'][11892] ="When using the device with my computer running Windows XP and Windows 7, I found that it connected easily without direct computer connection, requiring only a USB cable. However, when attempting to connect it to my Windows 8.1 computer with an old hard drive, I encountered some confusion and had to take a few extra steps to establish a stable connection. Overall, the device worked well with Windows XP and Windows 7, but required additional attention for proper connectivity with the Windows 8.1 computer and old hard drive setup."


In [55]:
test_dataset_raw_0_50['text_'][36412] ="My four-year-old son (age 3) absolutely adores these toys, and we also have a smaller version for the baby. He loves the sounds the animals make, and it's a fantastic toy that keeps him entertained. I highly recommend it. It was a gift for my niece, and she loves it too! Getting it as a free item made it even better. The set is the perfect size and looks wonderful. Assembling it is easy, and my son enjoys playing with it. The inclusion of two small pieces, each with a lid, is a nice feature. We gave this as a Christmas gift to my son, and he adores it; it's simply adorable. This train set is fantastic, and our kids enjoy playing with it. We got it for our 6-year-old, and it's a hit. Even our 5-year-old loves it. The train is about 18 tall and weighs approximately 8 lbs, making it suitable for younger children. Furthermore, it's a sturdy and durable toy, perfect for young babies, and the small plastic cup holder is a convenient addition."

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  test_dataset_raw_0_50['text_'][36412] ="My four-year-old son (age 3) absolutely adores these toys, and we also have a smaller version for the baby. He loves the sounds the animals make, and it's a fantastic toy that keeps him entertained. I highly recommend it. It was a gift for my niece, and she loves it too! Getting it as a free item made it even better. The set is the perfect size and looks wonderful. Assembling it is easy, and my son enjoys playing with it. The inclusion of two small pieces, each with a lid, is a nice feature. We gave this as a Christmas gift to my son, and he adores it; it's simply adorable. This train set is fantastic, and our kids enjoy playing with it. We got it for our 6-year-old, and it's a hit. Even our 5-year-old loves it. The train is about 18 tall and

In [58]:
test_dataset_raw_0_50['text_'][5625]  = "The perfect powder measure lives up to its name as it functions flawlessly. However, the only issue I encountered was with a small screw that was a bit too tight. Despite that, I'm still using it with great satisfaction."

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  test_dataset_raw_0_50['text_'][5625]  = "The perfect powder measure lives up to its name as it functions flawlessly. However, the only issue I encountered was with a small screw that was a bit too tight. Despite that, I'm still using it with great satisfaction."


In [60]:
test_dataset_raw_0_50['text_'][25570] = "I found great pleasure in reading this book as it beautifully combined chemistry and humor in a love story. However, I couldn't help but feel a bit melancholic when the story concluded. That might be the reason why I rated it 4 stars. The author's storytelling had me captivated throughout the entire journey."

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  test_dataset_raw_0_50['text_'][25570] = "I found great pleasure in reading this book as it beautifully combined chemistry and humor in a love story. However, I couldn't help but feel a bit melancholic when the story concluded. That might be the reason why I rated it 4 stars. The author's storytelling had me captivated throughout the entire journey."


In [62]:
test_dataset_raw_0_50['text_'][8501] = "Two words: heavy-duty! I use it daily with my phone, and it functions exceptionally well."

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  test_dataset_raw_0_50['text_'][8501] = "Two words: heavy-duty! I use it daily with my phone, and it functions exceptionally well."


In [64]:
test_dataset_raw_0_50['text_'][20411] ="It works wonderfully and has a pleasant scent. In fact, it's the only product I will purchase for my cats!"

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  test_dataset_raw_0_50['text_'][20411] ="It works wonderfully and has a pleasant scent. In fact, it's the only product I will purchase for my cats!"


In [66]:
test_dataset_raw_0_50['text_'][28867] = "The book is highly recommended for anyone looking for a clear and easily understandable format, along with straightforward examples that are easy to follow."

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  test_dataset_raw_0_50['text_'][28867] = "The book is highly recommended for anyone looking for a clear and easily understandable format, along with straightforward examples that are easy to follow."


In [68]:
test_dataset_raw_0_50['text_'][12409] ="This does remind me of 'Colusses: The World's Towards the Div', but I'm not sure what you want me to rephrase. Could you please provide more context or clarify your request?"

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  test_dataset_raw_0_50['text_'][12409] ="This does remind me of 'Colusses: The World's Towards the Div', but I'm not sure what you want me to rephrase. Could you please provide more context or clarify your request?"


In [70]:
test_dataset_raw_0_50['text_'][33851] ="This doll is absolutely adorable! She is incredibly soft and cuddly, and the baby's touch is gentle and tender."

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  test_dataset_raw_0_50['text_'][33851] ="This doll is absolutely adorable! She is incredibly soft and cuddly, and the baby's touch is gentle and tender."


In [72]:
test_dataset_raw_0_50['text_'][34246] ="I wish this item could have been slightly more expensive. While I'm unsure if I would have bought it from Amazon, I am contemplating my decision."

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  test_dataset_raw_0_50['text_'][34246] ="I wish this item could have been slightly more expensive. While I'm unsure if I would have bought it from Amazon, I am contemplating my decision."


In [74]:
test_dataset_raw_0_50['text_'][39476] = "These shoes are incredibly fashionable and stylish, making anyone wearing them look well-dressed. I purchased them for my son, and he absolutely loves them! He wears them frequently, and they provide great comfort. However, it's important to note that these are not waterproof boots, so their longevity might be limited in certain conditions. Overall, they are a fantastic shoe choice."

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  test_dataset_raw_0_50['text_'][39476] = "These shoes are incredibly fashionable and stylish, making anyone wearing them look well-dressed. I purchased them for my son, and he absolutely loves them! He wears them frequently, and they provide great comfort. However, it's important to note that these are not waterproof boots, so their longevity might be limited in certain conditions. Overall, they are a fantastic shoe choice."


In [76]:
test_dataset_raw_0_50['text_'][13831] = "This film is truly remarkable as a Christian movie. The storytelling is well-crafted, and the acting is of excellent quality. I highly recommend this film to anyone who appreciates a compelling Christian story. As a movie enthusiast, I must say this one stands out as a great choice."

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  test_dataset_raw_0_50['text_'][13831] = "This film is truly remarkable as a Christian movie. The storytelling is well-crafted, and the acting is of excellent quality. I highly recommend this film to anyone who appreciates a compelling Christian story. As a movie enthusiast, I must say this one stands out as a great choice."


In [78]:
test_dataset_raw_0_50['text_'][24371] = "I derive immense pleasure from reading this series. The characters are skillfully developed, and the narrative is masterfully told. The story is truly captivating and engrossing"

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  test_dataset_raw_0_50['text_'][24371] = "I derive immense pleasure from reading this series. The characters are skillfully developed, and the narrative is masterfully told. The story is truly captivating and engrossing"


In [80]:
test_dataset_raw_0_50['text_'][9662] ="This is another outstanding addition to an already impressive collection, offering excellent value for the money spent. While I may not be an expert on music, as a passionate music lover, I can confidently say that this is a remarkable choice."

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  test_dataset_raw_0_50['text_'][9662] ="This is another outstanding addition to an already impressive collection, offering excellent value for the money spent. While I may not be an expert on music, as a passionate music lover, I can confidently say that this is a remarkable choice."


In [82]:
test_dataset_raw_0_50['text_'][33463] = "Our 3-year-old daughter thoroughly enjoys playing with this toy, especially in her playroom. It's a delightful addition to her playtime."

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  test_dataset_raw_0_50['text_'][33463] = "Our 3-year-old daughter thoroughly enjoys playing with this toy, especially in her playroom. It's a delightful addition to her playtime."


In [84]:
test_dataset_raw_0_50['text_'][2954] = "This product is generously sized, providing a comfortable and pleasant feel in my hand. I purchased it as a gift for a friend who turned out to be a big fan of it. She loved it so much that she even sent one to her brother as a Christmas present. The positive reviews mentioning its excellent quality were indeed accurate, and I can vouch for its impressive build."

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  test_dataset_raw_0_50['text_'][2954] = "This product is generously sized, providing a comfortable and pleasant feel in my hand. I purchased it as a gift for a friend who turned out to be a big fan of it. She loved it so much that she even sent one to her brother as a Christmas present. The positive reviews mentioning its excellent quality were indeed accurate, and I can vouch for its impressive build."


In [86]:
test_dataset_raw_0_50['text_'][36835] = "Highly comfortable and pleasantly designed, I wholeheartedly recommend this product. Its quality is superb, and it offers a comfortable fit, even for those with wide feet."

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  test_dataset_raw_0_50['text_'][36835] = "Highly comfortable and pleasantly designed, I wholeheartedly recommend this product. Its quality is superb, and it offers a comfortable fit, even for those with wide feet."


In [88]:
test_dataset_raw_0_50['text_'][11507] = "After enduring several months of delays, this product has finally become a reality. I rate it 4 stars only because it's slightly too small for a laptop. The keyboard size is a bit compact, which might not be a major issue for me, but it could be better suited for laptops. Additionally, the keyboard is a bit large and cumbersome to handle. Personally, I prefer having my laptop on my lap or on the couch rather than dealing with a bulky keyboard on my lap with a hard surface. There are some doubts about the keyboard's longevity, as it may be too small for a laptop. However, it does a commendable job of securely holding my Macbook Pro, and I've been using it comfortably for some time now."

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  test_dataset_raw_0_50['text_'][11507] = "After enduring several months of delays, this product has finally become a reality. I rate it 4 stars only because it's slightly too small for a laptop. The keyboard size is a bit compact, which might not be a major issue for me, but it could be better suited for laptops. Additionally, the keyboard is a bit large and cumbersome to handle. Personally, I prefer having my laptop on my lap or on the couch rather than dealing with a bulky keyboard on my lap with a hard surface. There are some doubts about the keyboard's longevity, as it may be too small for a laptop. However, it does a commendable job of securely holding my Macbook Pro, and I've been using it comfortably for some time now."


In [90]:
test_dataset_raw_0_50['text_'][17802] = "I've owned this gun for a couple of years, and I couldn't be happier with its performance. Despite its small size, it packs a punch and proves its worth without the need for spending extra money. It's particularly useful for handling small jobs and comes in handy for tasks, especially when on the go or working in a truck."

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  test_dataset_raw_0_50['text_'][17802] = "I've owned this gun for a couple of years, and I couldn't be happier with its performance. Despite its small size, it packs a punch and proves its worth without the need for spending extra money. It's particularly useful for handling small jobs and comes in handy for tasks, especially when on the go or working in a truck."


In [92]:
test_dataset_raw_0_50['text_'][28244] = "I received this book as an Amazon freebie and got an ARC in exchange for an honest review. While the first two books were enjoyable, the rest of the story didn't quite captivate me, especially the second book, which lacked a compelling storyline. The hero and heroine were a bit frustrating, and their characters lacked depth, although the hero was likable and the heroine's growth was interesting. The villain was well-portrayed, but didn't evoke much emotion. The ending was pleasant, and I had hoped for a sequel that didn't materialize. Overall, the book had some enjoyable aspects with twists and turns, making it worth recommending to those who appreciate a good story."

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  test_dataset_raw_0_50['text_'][28244] = "I received this book as an Amazon freebie and got an ARC in exchange for an honest review. While the first two books were enjoyable, the rest of the story didn't quite captivate me, especially the second book, which lacked a compelling storyline. The hero and heroine were a bit frustrating, and their characters lacked depth, although the hero was likable and the heroine's growth was interesting. The villain was well-portrayed, but didn't evoke much emotion. The ending was pleasant, and I had hoped for a sequel that didn't materialize. Overall, the book had some enjoyable aspects with twists and turns, making it worth recommending to those who appreciate a good story."


In [94]:
test_dataset_raw_0_50['text_'][10309] ="The device serves its purpose, but it does have a design flaw. I must admit that it's a bit bulky. If you're looking for a nice and sturdy desktop, then this is the one to get. However, if you prefer a smaller desktop, then this might not be the best choice for you. I purchased this device for..."

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  test_dataset_raw_0_50['text_'][10309] ="The device serves its purpose, but it does have a design flaw. I must admit that it's a bit bulky. If you're looking for a nice and sturdy desktop, then this is the one to get. However, if you prefer a smaller desktop, then this might not be the best choice for you. I purchased this device for..."


In [96]:
test_dataset_raw_0_50['text_'][31786] ="It's unfortunate that this book has been overlooked in history, but dwelling on the past isn't worth my time. However, I must acknowledge that the book is well-written and in good condition. On the brighter side, I absolutely adored this book and couldn't wait to read it, along with its thrilling sequel. It seamlessly continues the series, and I'm eagerly looking forward to what unfolds next. This entire series has been a delightful journey for me over the past month, and each book has been a joy to read."

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  test_dataset_raw_0_50['text_'][31786] ="It's unfortunate that this book has been overlooked in history, but dwelling on the past isn't worth my time. However, I must acknowledge that the book is well-written and in good condition. On the brighter side, I absolutely adored this book and couldn't wait to read it, along with its thrilling sequel. It seamlessly continues the series, and I'm eagerly looking forward to what unfolds next. This entire series has been a delightful journey for me over the past month, and each book has been a joy to read."


In [98]:
test_dataset_raw_0_50['text_'][26381] = "I absolutely adored this book! The emotional love story kept me captivated, and I loved the characters' journey to find their way back together. It's a must-read for all romance enthusiasts. The story was very engaging, and the characters were well-developed, which I truly appreciated."

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  test_dataset_raw_0_50['text_'][26381] = "I absolutely adored this book! The emotional love story kept me captivated, and I loved the characters' journey to find their way back together. It's a must-read for all romance enthusiasts. The story was very engaging, and the characters were well-developed, which I truly appreciated."


In [100]:
test_dataset_raw_0_50['text_'][3940] ="I have two Keurig-type vacuum cleaners, and they work perfectly fine. The extra suction on the top is a great feature, and the blades make it easy to clean. I would definitely repurchase this product./The reviews mentioned that it was a great size, but I found it to be smaller than expected, so I had to order another set for my grandson. Nevertheless, I love the color, and they work wonderfully for me. /This blanket is fantastic! It's soft, cozy, and durable. While it's not overly heavy, it does a good job of keeping me comfortable in hot weather. I'll be buying another set for my daughter, who is having trouble sleeping. /I bought these curtains, and they looked pretty cute. They are also great for my son's bed. The colors are exactly as pictured, and the fabric is softer than expected, but I still like it. The vibrant colors are a pleasant surprise. They are a wonderful addition to my bedroom, and I highly recommend them."

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  test_dataset_raw_0_50['text_'][3940] ="I have two Keurig-type vacuum cleaners, and they work perfectly fine. The extra suction on the top is a great feature, and the blades make it easy to clean. I would definitely repurchase this product./The reviews mentioned that it was a great size, but I found it to be smaller than expected, so I had to order another set for my grandson. Nevertheless, I love the color, and they work wonderfully for me. /This blanket is fantastic! It's soft, cozy, and durable. While it's not overly heavy, it does a good job of keeping me comfortable in hot weather. I'll be buying another set for my daughter, who is having trouble sleeping. /I bought these curtains, and they looked pretty cute. They are also great for my son's bed. The colors are exactly as pict

In [102]:
test_dataset_raw_0_50['text_'][12791] = "This animated movie was fantastic! Initially, I didn't expect it to be as good as the original, but it pleasantly surprised me with its greatness."

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  test_dataset_raw_0_50['text_'][12791] = "This animated movie was fantastic! Initially, I didn't expect it to be as good as the original, but it pleasantly surprised me with its greatness."


In [104]:
test_dataset_raw_0_50['text_'][21130] = "My cat absolutely loves this toy, and it keeps her entertained for a considerable amount of time. She doesn't chase it, but it still proves to be an excellent value for the price."

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  test_dataset_raw_0_50['text_'][21130] = "My cat absolutely loves this toy, and it keeps her entertained for a considerable amount of time. She doesn't chase it, but it still proves to be an excellent value for the price."


In [107]:
test_dataset_raw_0_50['text_'][26333] = "Calling it 'broken' would be an understatement of the thrilling journey this story takes you on. Filled with captivating twists and turns, it keeps you hooked and eager for more. The characters are truly believable, and the fast-paced narrative adds to its appeal. Without a doubt, this book is a must-read, and I highly recommend it. I received this book from a friend, and in all honesty, I absolutely loved it!"

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  test_dataset_raw_0_50['text_'][26333] = "Calling it 'broken' would be an understatement of the thrilling journey this story takes you on. Filled with captivating twists and turns, it keeps you hooked and eager for more. The characters are truly believable, and the fast-paced narrative adds to its appeal. Without a doubt, this book is a must-read, and I highly recommend it. I received this book from a friend, and in all honesty, I absolutely loved it!"


In [109]:
test_dataset_raw_0_50['text_'][34131] = "This deck serves as an excellent starter set, featuring numerous high-quality cards. The variety among the cards is impressive, and the design is well-crafted."

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  test_dataset_raw_0_50['text_'][34131] = "This deck serves as an excellent starter set, featuring numerous high-quality cards. The variety among the cards is impressive, and the design is well-crafted."


In [111]:
test_dataset_raw_0_50['text_'][28036] ="Slight spoiler alert: Killian is portrayed as the male lead with a troubled past that has made him come across as a bit of a jerk. His desire to avoid involvement with the mercenary organization he was hired to protect was the only thing holding him back from making a decision. He had much to learn about the world and military, and it was a challenging lesson to grasp. Throughout the experience, he underwent significant personal growth, but it was hard to see him as anything other than a jerk.On the other hand, Kylo, a former Marine, is a young woman with a military background. Although she wasn't in a position to tell Killian how to behave, she was committed to protecting him. While she also had much to learn about her family and their opinions, she was determined to do everything in her power to keep him safe and make him happy. At the same time, Kylo felt that there was something about herself that would profoundly change her life for the better. The last thing she wanted was to..."

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  test_dataset_raw_0_50['text_'][28036] ="Slight spoiler alert: Killian is portrayed as the male lead with a troubled past that has made him come across as a bit of a jerk. His desire to avoid involvement with the mercenary organization he was hired to protect was the only thing holding him back from making a decision. He had much to learn about the world and military, and it was a challenging lesson to grasp. Throughout the experience, he underwent significant personal growth, but it was hard to see him as anything other than a jerk.On the other hand, Kylo, a former Marine, is a young woman with a military background. Although she wasn't in a position to tell Killian how to behave, she was committed to protecting him. While she also had much to learn about her family and their opini

In [113]:
test_dataset_raw_0_50['text_'][21829] ="This product is fantastic for taking my two small dogs outside with me. I plan to keep using it for the next couple of months due to its great functionality and the excellent price. It works perfectly, especially since my dog is a chewer, and I find it easy to get her to eat with it."

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  test_dataset_raw_0_50['text_'][21829] ="This product is fantastic for taking my two small dogs outside with me. I plan to keep using it for the next couple of months due to its great functionality and the excellent price. It works perfectly, especially since my dog is a chewer, and I find it easy to get her to eat with it."


In [115]:
test_dataset_raw_0_50['text_'][36002] ="We bought this camera for my daughter, and she absolutely loves it, especially the bright colors. She plays with it frequently, and it's easy for her to use. The camera has been perfect for her as she enjoys taking pictures of everything she can, including her family's toys and other items. The camera's quality is impressive, and it's very durable and user-friendly. I highly recommend this camera for any child interested in video. It's an excellent choice, and I would recommend it to anyone who is interested."

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  test_dataset_raw_0_50['text_'][36002] ="We bought this camera for my daughter, and she absolutely loves it, especially the bright colors. She plays with it frequently, and it's easy for her to use. The camera has been perfect for her as she enjoys taking pictures of everything she can, including her family's toys and other items. The camera's quality is impressive, and it's very durable and user-friendly. I highly recommend this camera for any child interested in video. It's an excellent choice, and I would recommend it to anyone who is interested."


In [117]:
test_dataset_raw_0_50['text_'][32896] ="This item fits my 20-month-old perfectly and includes all the necessary pieces to make it function."

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  test_dataset_raw_0_50['text_'][32896] ="This item fits my 20-month-old perfectly and includes all the necessary pieces to make it function."


In [119]:
test_dataset_raw_0_50['text_'][13185] ="I haven't watched the entire movie yet, and I won't be purchasing the Blu-ray. Instead, I had to obtain a copy in some other way."

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  test_dataset_raw_0_50['text_'][13185] ="I haven't watched the entire movie yet, and I won't be purchasing the Blu-ray. Instead, I had to obtain a copy in some other way."


In [121]:
test_dataset_raw_0_50['text_'][4173] ="I purchased this as a gift for my husband, and he absolutely adores it. He loves the way it..."

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  test_dataset_raw_0_50['text_'][4173] ="I purchased this as a gift for my husband, and he absolutely adores it. He loves the way it..."


In [123]:
test_dataset_raw_0_50['text_'][6265] ="While I typically shoot multiple weapons, I found it a bit challenging to acquire this particular one. However, I managed to get a solid grip on the gun, making it suitable for use with a Ruger 1911."

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  test_dataset_raw_0_50['text_'][6265] ="While I typically shoot multiple weapons, I found it a bit challenging to acquire this particular one. However, I managed to get a solid grip on the gun, making it suitable for use with a Ruger 1911."


In [125]:
test_dataset_raw_0_50['text_'][36584] ="My granddaughter adores it. I purchased boxer briefs for my daughter, and she was thrilled with them."

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  test_dataset_raw_0_50['text_'][36584] ="My granddaughter adores it. I purchased boxer briefs for my daughter, and she was thrilled with them."


In [127]:
test_dataset_raw_0_50['text_'][18300] ="I'm giving this product 4 stars because it's a cost-effective light that is suitable for various tasks.I previously purchased a similar light from Home Depot that was advertised as a 4-watt light, but it turned out to be a 5-watt light with no sun and a 2-watt dimmer. In contrast, the one I have now is genuinely a 4-watt light."

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  test_dataset_raw_0_50['text_'][18300] ="I'm giving this product 4 stars because it's a cost-effective light that is suitable for various tasks.I previously purchased a similar light from Home Depot that was advertised as a 4-watt light, but it turned out to be a 5-watt light with no sun and a 2-watt dimmer. In contrast, the one I have now is genuinely a 4-watt light."


In [129]:
test_dataset_raw_0_50['text_'][36500] ="I've always been a Star Wars fan, but I was pleasantly surprised by how well-made this particular item is. The attention to detail is fantastic, and the paintwork is exquisite. Despite my initial expectation of a smaller figure, I found it to be perfect in size. It's still enjoyable to play with and is a must-have for any Star Wars enthusiast.I bought this as a gift for my son, and he was genuinely interested in it. The toy is very cute, and he loved it. I'm looking forward to getting more items like this for him, knowing it will make him happy. Great product! My granddaughter adores these bath toys, but the kids prefer not to leave them outside for too long. I got them for my 9-year-old daughter on her birthday, and she loves playing with them, although she found the small pieces a bit challenging to handle. My granddaughter loved this doll, even though she's not generally into dolls. She was initially a little apprehensive about the wooden material, but she found the doll to be very cute. However, I felt it was a bit more expensive than expected, and the dress was too small and loose at the back. The dolls are also cheaply made and challenging to put on the doll's back. Despite these shortcomings, it's a fun toy overall."

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  test_dataset_raw_0_50['text_'][36500] ="I've always been a Star Wars fan, but I was pleasantly surprised by how well-made this particular item is. The attention to detail is fantastic, and the paintwork is exquisite. Despite my initial expectation of a smaller figure, I found it to be perfect in size. It's still enjoyable to play with and is a must-have for any Star Wars enthusiast.I bought this as a gift for my son, and he was genuinely interested in it. The toy is very cute, and he loved it. I'm looking forward to getting more items like this for him, knowing it will make him happy. Great product! My granddaughter adores these bath toys, but the kids prefer not to leave them outside for too long. I got them for my 9-year-old daughter on her birthday, and she loves playing with th

In [131]:
test_dataset_raw_0_50['text_'][8968] ="The product worked right away and significantly enhanced home security. The only downside is that it takes some time to get accustomed to using it."

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  test_dataset_raw_0_50['text_'][8968] ="The product worked right away and significantly enhanced home security. The only downside is that it takes some time to get accustomed to using it."


In [133]:
test_dataset_raw_0_50['text_'][32790] ="This product works exceptionally well! The material is thick and durable, and the pieces are sturdy, capable of enduring the wear and tear of... "

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  test_dataset_raw_0_50['text_'][32790] ="This product works exceptionally well! The material is thick and durable, and the pieces are sturdy, capable of enduring the wear and tear of... "


In [135]:
test_dataset_raw_0_50['text_'][26048] ="I thoroughly enjoyed reading this P.J. Parrish book, and it was my first experience with this author's work. I loved it so much that I have already bought her other books, and I'm eagerly looking forward to reading them. I'm very excited about diving into more of her writing."

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  test_dataset_raw_0_50['text_'][26048] ="I thoroughly enjoyed reading this P.J. Parrish book, and it was my first experience with this author's work. I loved it so much that I have already bought her other books, and I'm eagerly looking forward to reading them. I'm very excited about diving into more of her writing."


In [137]:
test_dataset_raw_0_50['text_'][10131] ="Adding an external HDD to your computer is incredibly easy with this product; all you have to do is plug it in. I had to replace a malfunctioning solid-state drive that wasn't compatible with the other drives I had. If you're considering making a purchase..."

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  test_dataset_raw_0_50['text_'][10131] ="Adding an external HDD to your computer is incredibly easy with this product; all you have to do is plug it in. I had to replace a malfunctioning solid-state drive that wasn't compatible with the other drives I had. If you're considering making a purchase..."


In [139]:
test_dataset_raw_0_50['text_'][28084] ="I thoroughly enjoy a good SEAL story, and this book didn't disappoint. The characters are well-developed and believable, and the writing is solid. I finished reading it in just a few days and found the plot to be strong, leading to a satisfying ending. The author also excels in describing the most common type of dog and its characteristics, making the book an enjoyable, quick, and easy read. I read the entire story in one sitting and found it captivating; I'm a fan of Ms. King's writing and look forward to more from her. Overall, it's a great start to a series, and I highly recommend this book, especially to those who appreciate westerns and seek an entertaining, quick read. The characters and plot are well-written, and the humor adds to the enjoyment of the story."

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  test_dataset_raw_0_50['text_'][28084] ="I thoroughly enjoy a good SEAL story, and this book didn't disappoint. The characters are well-developed and believable, and the writing is solid. I finished reading it in just a few days and found the plot to be strong, leading to a satisfying ending. The author also excels in describing the most common type of dog and its characteristics, making the book an enjoyable, quick, and easy read. I read the entire story in one sitting and found it captivating; I'm a fan of Ms. King's writing and look forward to more from her. Overall, it's a great start to a series, and I highly recommend this book, especially to those who appreciate westerns and seek an entertaining, quick read. The characters and plot are well-written, and the humor adds to the 

In [141]:
test_dataset_raw_0_50['text_'][6407] ="I am now interested in getting these in a larger size, as I have found them to be quite useful. I even used them for a few hours in a cave, and I am impressed with the solid seams and pleasant texture. So far, I haven't experienced any problems with the seams becoming loose."

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  test_dataset_raw_0_50['text_'][6407] ="I am now interested in getting these in a larger size, as I have found them to be quite useful. I even used them for a few hours in a cave, and I am impressed with the solid seams and pleasant texture. So far, I haven't experienced any problems with the seams becoming loose."


In [145]:
merged_df_gpt = pd.concat([test_dataset_raw_0_50, test_dataset_raw_1_100], ignore_index=True)

# Shuffle the index
test_150_gpt = merged_df_gpt.sample(frac=1, random_state=42).reset_index(drop=True)

In [146]:
test_150_gpt

Unnamed: 0,category,rating,label,text_,target
0,Pet_Supplies_5,3.0,OR,"I know this works great for other dogs, but it...",1
1,Kindle_Store_5,5.0,CG,I derive immense pleasure from reading this se...,0
2,Clothing_Shoes_and_Jewelry_5,5.0,OR,This turned out to be a great jacket. This ja...,1
3,Kindle_Store_5,5.0,OR,It is a very nice story. It is funny and lovin...,1
4,Kindle_Store_5,5.0,OR,"I picked this up, expecting just another short...",1
...,...,...,...,...,...
145,Pet_Supplies_5,4.0,OR,"Oh, gosh, I can't believe it's taken me this l...",1
146,Clothing_Shoes_and_Jewelry_5,5.0,OR,Been wearing 501s since the late 1970s - can't...,1
147,Toys_and_Games_5,5.0,CG,This doll is absolutely adorable! She is incre...,0
148,Toys_and_Games_5,5.0,OR,These were a fun addition to the goody bags fo...,1


In [147]:
test_encodings_gpt = tokenizer(test_150_gpt['text_'].tolist(), truncation=True, padding=True, max_length=256)

test_dataset_gpt = tf.data.Dataset.from_tensor_slices((
    dict(test_encodings_gpt),
    test_150_gpt['target'].values
))


In [148]:
test_dataset_gpt_batched = test_dataset_gpt.batch(16)
y_pred_raw_gpt = model.predict(test_dataset_gpt_batched)

# Convert raw predictions to class predictions
y_pred_gpt = np.argmax(y_pred_raw_gpt.logits, axis=1)

# Get the actual class labels
y_true_gpt = []
for features, label in test_dataset_gpt:
    y_true_gpt.append(label.numpy())
y_true_gpt = np.array(y_true_gpt)


# Generate a classification report
report_gpt = classification_report(y_true_gpt, y_pred_gpt, target_names=["CG", "OR"])  # replace target_names with your actual class names
print(report_gpt)

              precision    recall  f1-score   support

          CG       0.87      0.68      0.76        50
          OR       0.86      0.95      0.90       100

    accuracy                           0.86       150
   macro avg       0.86      0.81      0.83       150
weighted avg       0.86      0.86      0.85       150



# chatgpt4

In [149]:
random_25_rows = test_dataset_raw_0.sample(n=25)

# Build the new DataFrame using the randomly selected rows
test_dataset_raw_0_25 = pd.DataFrame(random_25_rows)

In [151]:
test_dataset_raw_0_25.index

Int64Index([17408,  9913,  7820, 27662, 26833,  7804, 21469, 18647, 15587,
            39912, 17068,  2006, 22140, 33467, 22840, 35898, 30798, 25185,
            17279,  4867, 22505, 11525, 25079, 36858, 37468],
           dtype='int64')

In [200]:
test_dataset_raw_0_25['text_'][37468]

'Fits true to size and looks great.  It is a little snug for my small size,'

In [153]:
test_dataset_raw_0_25['text_'][17408] ="Fantastic lights! Kudos to both the manufacturer and the seller! The quality is top-notch. It's a charming, compact light that functions perfectly. The pricing is also remarkably reasonable. The light unit itself is excellent and performs brilliantly."

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  test_dataset_raw_0_25['text_'][17408] ="Fantastic lights! Kudos to both the manufacturer and the seller! The quality is top-notch. It's a charming, compact light that functions perfectly. The pricing is also remarkably reasonable. The light unit itself is excellent and performs brilliantly."


In [155]:
test_dataset_raw_0_25['text_'][9913] ="The item functions perfectly. However, the description doesn't include details about the shipping box. I'm planning to return this item. Despite this, it's an impressive product. I purchased this for a friend who is extremely satisfied with it. The only aspect I would like to change is..."

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  test_dataset_raw_0_25['text_'][9913] ="The item functions perfectly. However, the description doesn't include details about the shipping box. I'm planning to return this item. Despite this, it's an impressive product. I purchased this for a friend who is extremely satisfied with it. The only aspect I would like to change is..."


In [157]:
test_dataset_raw_0_25['text_'][7820] = "This is an essential item for anyone wanting to experiment with a new bat for the first time. I had a friend who was interested in a bat at a local store that was similar to one we'd used for batting practice. That bat performed exceptionally well, and we absolutely loved it. Despite being slightly on the pricier side, it met our requirements perfectly and was worth the investment. We recently went to Walmart and purchased a new bat. The cost was $30 less than the previous one, which delighted us. The bat was a tad heavy, so we stored it in a bag and placed it in a closet. Despite initial difficulties fitting it into our cooler, it ended up being a good fit and we used it for our batting practice. It's a superb bat, and we're pleased with our purchase. I would recommend it to anyone considering buying a bat. In addition, we acquired a tent for our boat journey along Mexico's east coast. It was astounding how warm and comfortable it was. The tent was not only spacious but also simple to set up. It even allowed us to sleep inside while we were out on the water, with a clear view of the water."

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  test_dataset_raw_0_25['text_'][7820] = "This is an essential item for anyone wanting to experiment with a new bat for the first time. I had a friend who was interested in a bat at a local store that was similar to one we'd used for batting practice. That bat performed exceptionally well, and we absolutely loved it. Despite being slightly on the pricier side, it met our requirements perfectly and was worth the investment. We recently went to Walmart and purchased a new bat. The cost was $30 less than the previous one, which delighted us. The bat was a tad heavy, so we stored it in a bag and placed it in a closet. Despite initial difficulties fitting it into our cooler, it ended up being a good fit and we used it for our batting practice. It's a superb bat, and we're pleased with our

In [159]:
test_dataset_raw_0_25['text_'][27662] = "I found the book incredibly captivating. The characters were skillfully fleshed out, making the storyline quite enjoyable. I received an advance reader's copy in return for an unbiased review. This book is a gem; the characters and plot are beautifully crafted. It serves as an excellent series for the younger audience. Having read the initial book in the series, I can vouch for the overall quality of the series. I strongly recommend this book. This book was a pleasure to read. I am a fan of good romance, and this had a hint of mystery, enhancing the overall story. This book was appealing, though initially, there was a bit of confusion. However, once I overcame the initial ambiguity, the read was worthwhile. I'm eagerly anticipating that the upcoming books in this series will follow suit. This charming tale revolves around a young man and a girl named Belinda who..."

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  test_dataset_raw_0_25['text_'][27662] = "I found the book incredibly captivating. The characters were skillfully fleshed out, making the storyline quite enjoyable. I received an advance reader's copy in return for an unbiased review. This book is a gem; the characters and plot are beautifully crafted. It serves as an excellent series for the younger audience. Having read the initial book in the series, I can vouch for the overall quality of the series. I strongly recommend this book. This book was a pleasure to read. I am a fan of good romance, and this had a hint of mystery, enhancing the overall story. This book was appealing, though initially, there was a bit of confusion. However, once I overcame the initial ambiguity, the read was worthwhile. I'm eagerly anticipating that the 

In [161]:
test_dataset_raw_0_25['text_'][26833] = "This is the sequel in The Crystal Queen series. The Crystal Queen series has held a special place in my reading list for the past few years, and I'm confident that I'll continue to explore more of her works in the future. This book was an absolute delight! A friend gifted me this book in return for a candid review. I found it marvelous, brimming with compelling characters and unexpected plot twists. I heartily endorse it! It was..."

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  test_dataset_raw_0_25['text_'][26833] = "This is the sequel in The Crystal Queen series. The Crystal Queen series has held a special place in my reading list for the past few years, and I'm confident that I'll continue to explore more of her works in the future. This book was an absolute delight! A friend gifted me this book in return for a candid review. I found it marvelous, brimming with compelling characters and unexpected plot twists. I heartily endorse it! It was..."


In [163]:
test_dataset_raw_0_25['text_'][7804] ="The only reason I rated this a 3-star is because I didn't want to unduly skew the ratings. It may not be the best in terms of design, but it's undoubtedly a superior product. I also gave it 4 stars at one point, not because I desired to use it, but rather I needed it to kickstart my day. Despite its effectiveness, I didn't intend to use it for anything else. My curiosity was mainly around whether it could perform that specific task, and it did. However, it's challenging to always ascertain if you're making a mistake. It's a remarkable product, and I plan to purchase more of it. I'll certainly retain it for my wife. Regarding the tent, I've been using it for several years without encountering any issues. I employed this tent during a boat ride in the summer of 2012, and its durability has been impressive. The tent boasts a substantial footprint and appears to be well-constructed. I'd recommend it to anyone looking to establish a tent on a boat. I've used this tent on a few other boats..."

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  test_dataset_raw_0_25['text_'][7804] ="The only reason I rated this a 3-star is because I didn't want to unduly skew the ratings. It may not be the best in terms of design, but it's undoubtedly a superior product. I also gave it 4 stars at one point, not because I desired to use it, but rather I needed it to kickstart my day. Despite its effectiveness, I didn't intend to use it for anything else. My curiosity was mainly around whether it could perform that specific task, and it did. However, it's challenging to always ascertain if you're making a mistake. It's a remarkable product, and I plan to purchase more of it. I'll certainly retain it for my wife. Regarding the tent, I've been using it for several years without encountering any issues. I employed this tent during a boat ride 

In [165]:
test_dataset_raw_0_25['text_'][21469] ="This product shields my kittens from the odor of the litter box. Additionally, I appreciate that it's one of the initial items I've purchased for my kitty that we're not constantly having to remove and..."

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  test_dataset_raw_0_25['text_'][21469] ="This product shields my kittens from the odor of the litter box. Additionally, I appreciate that it's one of the initial items I've purchased for my kitty that we're not constantly having to remove and..."


In [167]:
test_dataset_raw_0_25['text_'][18647] ="The Bosch template adapters can be quite a challenge to locate. I find myself needing to switch them off and on. I replaced the original adapters with these ones, which was a surprisingly straightforward task. They continue to function excellently, leaving me very satisfied. I purchased this item to replace a damaged one I previously owned. I appreciate the ability to use the hose to clear the ballast from my sink. After a few installations, it has proven to be the ideal replacement for the broken one. I'm now planning to..."

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  test_dataset_raw_0_25['text_'][18647] ="The Bosch template adapters can be quite a challenge to locate. I find myself needing to switch them off and on. I replaced the original adapters with these ones, which was a surprisingly straightforward task. They continue to function excellently, leaving me very satisfied. I purchased this item to replace a damaged one I previously owned. I appreciate the ability to use the hose to clear the ballast from my sink. After a few installations, it has proven to be the ideal replacement for the broken one. I'm now planning to..."


In [169]:
test_dataset_raw_0_25['text_'][15587] ="Fantastic tool from an impressive toolbox. The only issue is that it includes a miniature screwdriver."

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  test_dataset_raw_0_25['text_'][15587] ="Fantastic tool from an impressive toolbox. The only issue is that it includes a miniature screwdriver."


In [171]:
test_dataset_raw_0_25['text_'][39912] ="I am the proud owner of an OMAPC, and the accompanying knee pads are a perfect fit. The heel size is just right, and the material is pleasantly soft and comfy. However, I encountered an issue with the material being excessively loose. Consequently, I'll be returning it. I opted for a size up on reorder, and it fitted perfectly. I had to wash the boots on the first day, but they're incredibly comfortable. I've yet to wear them for an extended period, but finding boots that remain comfortable for long durations has always been a challenge. For reference, I'm 5'4 and weigh 140lbs, and these..."

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  test_dataset_raw_0_25['text_'][39912] ="I am the proud owner of an OMAPC, and the accompanying knee pads are a perfect fit. The heel size is just right, and the material is pleasantly soft and comfy. However, I encountered an issue with the material being excessively loose. Consequently, I'll be returning it. I opted for a size up on reorder, and it fitted perfectly. I had to wash the boots on the first day, but they're incredibly comfortable. I've yet to wear them for an extended period, but finding boots that remain comfortable for long durations has always been a challenge. For reference, I'm 5'4 and weigh 140lbs, and these..."


In [173]:
test_dataset_raw_0_25['text_'][17068] ="The quality is appalling. Do not squander valuable metal or steel on it. The sole reason I purchased this item was to replace a faulty..."

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  test_dataset_raw_0_25['text_'][17068] ="The quality is appalling. Do not squander valuable metal or steel on it. The sole reason I purchased this item was to replace a faulty..."


In [175]:
test_dataset_raw_0_25['text_'][2006] ="Despite its modest size, this machine makes a sound investment. The only drawback is its compact size. The components needed to attach the cover at the top were..."

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  test_dataset_raw_0_25['text_'][2006] ="Despite its modest size, this machine makes a sound investment. The only drawback is its compact size. The components needed to attach the cover at the top were..."


In [177]:
test_dataset_raw_0_25['text_'][22140] ="I own two small breed dogs and felt this would be a beneficial enhancement for the litter box. The top comes in a striking bright color and is simple to clean. I used this tool to tidy up my older cat's litter box, although it didn't include a large filter."

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  test_dataset_raw_0_25['text_'][22140] ="I own two small breed dogs and felt this would be a beneficial enhancement for the litter box. The top comes in a striking bright color and is simple to clean. I used this tool to tidy up my older cat's litter box, although it didn't include a large filter."


In [179]:
test_dataset_raw_0_25['text_'][33467] ="Delightfully soft and plush. My daughter adores it, and her two-year-old sister shares the same sentiment."

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  test_dataset_raw_0_25['text_'][33467] ="Delightfully soft and plush. My daughter adores it, and her two-year-old sister shares the same sentiment."


In [181]:
test_dataset_raw_0_25['text_'][22840] ="Both my Aussie and my African Grey are fond of these! I plan to keep them stocked for the next few months. This food is a big hit with my dogs. My dog consumes it leisurely, first from a deep, slow-feed bowl, then from a larger bowl. I own two dogs, each weighing 4 lbs., and one of them is quite finicky about food. I've purchased a second bag to use for her vet visits. The two bags I've received so far are strikingly similar."

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  test_dataset_raw_0_25['text_'][22840] ="Both my Aussie and my African Grey are fond of these! I plan to keep them stocked for the next few months. This food is a big hit with my dogs. My dog consumes it leisurely, first from a deep, slow-feed bowl, then from a larger bowl. I own two dogs, each weighing 4 lbs., and one of them is quite finicky about food. I've purchased a second bag to use for her vet visits. The two bags I've received so far are strikingly similar."


In [183]:
test_dataset_raw_0_25['text_'][35898] ="I was taken aback by the level of detail in the figures. The Batman figure is notably impressive, although the figure itself is somewhat diminutive, and the cover seems quite fragile. I wouldn't advise anyone to purchase this figure. Nonetheless, it remains an entertaining toy to play with and makes a fine addition to any collection. This product is exceptionally well-crafted. My daughter has engaged with it for approximately two months now and continues to enjoy it immensely. I'm extremely satisfied with this purchase. My son is nearly 3, and..."

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  test_dataset_raw_0_25['text_'][35898] ="I was taken aback by the level of detail in the figures. The Batman figure is notably impressive, although the figure itself is somewhat diminutive, and the cover seems quite fragile. I wouldn't advise anyone to purchase this figure. Nonetheless, it remains an entertaining toy to play with and makes a fine addition to any collection. This product is exceptionally well-crafted. My daughter has engaged with it for approximately two months now and continues to enjoy it immensely. I'm extremely satisfied with this purchase. My son is nearly 3, and..."


In [185]:
test_dataset_raw_0_25['text_'][30798] ="I went through the whole book, and it proved to be a fascinating read. The narrative commences when Kary, one of the two girls, is assaulted by an enigmatic character. The abductor is portrayed as a devilish figure. Interestingly, Kary's father held the position of a priest, and strikingly, so did the kidnapper."

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  test_dataset_raw_0_25['text_'][30798] ="I went through the whole book, and it proved to be a fascinating read. The narrative commences when Kary, one of the two girls, is assaulted by an enigmatic character. The abductor is portrayed as a devilish figure. Interestingly, Kary's father held the position of a priest, and strikingly, so did the kidnapper."


In [187]:
test_dataset_raw_0_25['text_'][25185] ="This author is a new discovery for me. I found her work captivating and am eager to delve into more of her books. I received an advance reader's copy in return for my candid review. I'm smitten with this book."

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  test_dataset_raw_0_25['text_'][25185] ="This author is a new discovery for me. I found her work captivating and am eager to delve into more of her books. I received an advance reader's copy in return for my candid review. I'm smitten with this book."


In [189]:
test_dataset_raw_0_25['text_'][17279] = "These are indeed very impressive - a high-quality tool set. I plan to retain them for the next few years. They were a bargain at a fantastic price! They function excellently. The light is somewhat brighter..."

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  test_dataset_raw_0_25['text_'][17279] = "These are indeed very impressive - a high-quality tool set. I plan to retain them for the next few years. They were a bargain at a fantastic price! They function excellently. The light is somewhat brighter..."


In [191]:
test_dataset_raw_0_25['text_'][4867] ="This clock is a must-have! I absolutely adore the texture of it."

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  test_dataset_raw_0_25['text_'][4867] ="This clock is a must-have! I absolutely adore the texture of it."


In [193]:
test_dataset_raw_0_25['text_'][22505] ="I'm a big fan of this kit. My puppy, being an intense chewer, makes quick work of it, and I love how quickly I can toss this thing. I've been using it along with the Tasers on a 4lb bag of dog food, and she doesn't object to it at all. I keep another 4 lb bag of dog food handy, which is excellent for her to carry around in her mouth. She's absolutely wild about this."

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  test_dataset_raw_0_25['text_'][22505] ="I'm a big fan of this kit. My puppy, being an intense chewer, makes quick work of it, and I love how quickly I can toss this thing. I've been using it along with the Tasers on a 4lb bag of dog food, and she doesn't object to it at all. I keep another 4 lb bag of dog food handy, which is excellent for her to carry around in her mouth. She's absolutely wild about this."


In [195]:
test_dataset_raw_0_25['text_'][11525] ="The arm functions excellently. I use it as an extended cable for my computer and also have one in my bedroom. The cable is robust and easy to operate. I would certainly recommend it as a valuable addition to your computer setup. I acquired this device with the intention of utilizing it as a family-friendly DVD player. I own a Samsung Galaxy S4 and a Sony PS3. The Sony is the only device among them that I haven't tested yet. I wanted to make my Galaxy S4 compatible with my Samsung, but I was concerned that a lengthy cord might reduce its lifespan. I came across the Sony on a sale at Best Buy and decided to give it a shot. It doesn't feature a cord but does offer a USB port, which is incredibly useful for transferring DVD files."

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  test_dataset_raw_0_25['text_'][11525] ="The arm functions excellently. I use it as an extended cable for my computer and also have one in my bedroom. The cable is robust and easy to operate. I would certainly recommend it as a valuable addition to your computer setup. I acquired this device with the intention of utilizing it as a family-friendly DVD player. I own a Samsung Galaxy S4 and a Sony PS3. The Sony is the only device among them that I haven't tested yet. I wanted to make my Galaxy S4 compatible with my Samsung, but I was concerned that a lengthy cord might reduce its lifespan. I came across the Sony on a sale at Best Buy and decided to give it a shot. It doesn't feature a cord but does offer a USB port, which is incredibly useful for transferring DVD files."


In [197]:
test_dataset_raw_0_25['text_'][25079] ="I'm uncertain about the origins of some individuals, but comprehending the events that transpired is crucial.I found considerable enjoyment in reading this book and I..."

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  test_dataset_raw_0_25['text_'][25079] ="I'm uncertain about the origins of some individuals, but comprehending the events that transpired is crucial.I found considerable enjoyment in reading this book and I..."


In [199]:
test_dataset_raw_0_25['text_'][36858] ="It has an odd fit, tight around the thighs. I'm unsure if the knee section is considerably tighter, but..."

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  test_dataset_raw_0_25['text_'][36858] ="It has an odd fit, tight around the thighs. I'm unsure if the knee section is considerably tighter, but..."


In [201]:
test_dataset_raw_0_25['text_'][37468] ="It fits accurately to size and has an appealing appearance. However, for my petite frame, it's slightly tight."

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  test_dataset_raw_0_25['text_'][37468] ="It fits accurately to size and has an appealing appearance. However, for my petite frame, it's slightly tight."


In [202]:
merged_df_120 = pd.concat([test_dataset_raw_0_25, test_dataset_raw_1_100], ignore_index=True)

# Shuffle the index
test_120 = merged_df_120.sample(frac=1, random_state=42).reset_index(drop=True)

In [203]:
test_encodings_120 = tokenizer(test_120['text_'].tolist(), truncation=True, padding=True, max_length=256)

test_dataset_120 = tf.data.Dataset.from_tensor_slices((
    dict(test_encodings_120),
    test_120['target'].values
))

In [215]:
test_dataset_120

<_TensorSliceDataset element_spec=({'input_ids': TensorSpec(shape=(256,), dtype=tf.int32, name=None), 'attention_mask': TensorSpec(shape=(256,), dtype=tf.int32, name=None)}, TensorSpec(shape=(), dtype=tf.int64, name=None))>

In [204]:
test_dataset_120_batched = test_dataset_120.batch(16)
y_pred_raw_120 = model.predict(test_dataset_120_batched)

# Convert raw predictions to class predictions
y_pred_120 = np.argmax(y_pred_raw_120.logits, axis=1)

# Get the actual class labels
y_true_120 = []
for features, label in test_dataset_120:
    y_true_120.append(label.numpy())
y_true_120 = np.array(y_true_120)


# Generate a classification report
report_120 = classification_report(y_true_120, y_pred_120, target_names=["CG", "OR"])  # replace target_names with your actual class names
print(report_120)

              precision    recall  f1-score   support

          CG       0.69      0.44      0.54        25
          OR       0.87      0.95      0.91       100

    accuracy                           0.85       125
   macro avg       0.78      0.69      0.72       125
weighted avg       0.83      0.85      0.83       125



# GPT 4 with contents generating

In [205]:

# Dictionary with your texts and target values
data = {
    "text_": [
        "This kitchen blender is not only a perfect companion for creating delightful smoothies but also an elegant addition to any kitchen countertop.",
        "The comfort and sound quality of these headphones are nothing short of impressive. The noise cancellation feature allows for an immersive experience.",
        "This book is an absolute page-turner. The storyline is intriguing, the characters are well-developed, and the author's writing style is compelling.",
        "I was thoroughly impressed with this smartphone. It offers excellent performance, a crisp display, and a battery that lasts all day.",
        "This cookware set exceeded my expectations. It's not only aesthetically pleasing but also has an amazing non-stick surface and heats evenly.",
        "These running shoes are fantastic. They offer great support, are lightweight, and incredibly comfortable.",
        "The soundbar provides clear, room-filling sound. The setup was straightforward, and the Bluetooth connectivity is a great feature.",
        "This backpack is not only stylish but also functional. It has a generous storage capacity, and the material is durable.",
        "The vacuum cleaner is extremely efficient. The powerful suction is a plus, and it maneuvers around furniture effortlessly.",
        "The camping tent is easy to set up and spacious. The material is durable, withstanding various weather conditions.",
        "This pet bed is a hit with my dog. It's plush and comfortable, providing a cozy spot for her to rest.",
        "The keyboard delivers an enjoyable typing experience. The keys are responsive and comfortable to use for prolonged periods.",
        "This skincare product has transformed my skin. It's gentle, yet effective, leaving my skin feeling soft, refreshed, and visibly brighter.",
        "These wireless earbuds have an impressive sound quality, a comfortable fit, and the battery life is long-lasting.",
        "This yoga mat is excellent. It provides enough cushioning, has a non-slip surface, and is easy to clean.",
        "I love this coffee maker. It brews coffee quickly, it's easy to use, and the coffee tastes great.",
        "The slow cooker is a gem. It's easy to operate, the food comes out delicious, and it's easy to clean.",
        "This facial cleanser is wonderful. It cleanses deeply, leaving the skin feeling fresh and clean without drying it out.",
        "The fitness tracker is an excellent workout companion. It's accurate, easy to use, and the app provides insightful data.",
        "This bedsheet set is fantastic. It's incredibly soft, comfortable, and the color hasn't faded after several washes.",
        "This nail polish is amazing. The color is vibrant, it applies smoothly, and it has a long-lasting finish.",
        "This computer mouse is reliable and comfortable to use. It moves smoothly and the click is quiet.",
        "The baby monitor gives peace of mind. The video is clear, the audio is crisp, and the range is good.",
        "This electric toothbrush cleans teeth thoroughly. It has different modes for different needs, and the battery life is good.",
        "This lawn mower is a workhorse. It's powerful, easy to maneuver, and the cut is even."
    ],
    "target": [0] * 25
}

# Create DataFrame
test_dataset_raw_1_25 = pd.DataFrame(data)




In [206]:
test_dataset_raw_1_25

Unnamed: 0,text_,target
0,This kitchen blender is not only a perfect com...,0
1,The comfort and sound quality of these headpho...,0
2,This book is an absolute page-turner. The stor...,0
3,I was thoroughly impressed with this smartphon...,0
4,This cookware set exceeded my expectations. It...,0
5,These running shoes are fantastic. They offer ...,0
6,"The soundbar provides clear, room-filling soun...",0
7,This backpack is not only stylish but also fun...,0
8,The vacuum cleaner is extremely efficient. The...,0
9,The camping tent is easy to set up and spaciou...,0


In [208]:
test_dataset_raw_2_100 = test_dataset_raw_1_100[['text_','target']]

In [209]:
test_dataset_raw_2_100

Unnamed: 0,text_,target
11323,Are people really dum enough to pay this for t...,1
2591,These are great. They are a bit smaller than I...,1
7051,These are really good cycling gloves. I have h...,1
17807,This painter's tape has a strong stick. This i...,1
30082,Her books are truly laugh our loud funny. I lo...,1
...,...,...
5415,Very good dynamic range. Comfortable to wear w...,1
12067,About half way through with the season and it'...,1
10087,Item works as advertised. Good for baby monito...,1
30954,Its been a while since Ive read a Grisham. And...,1


In [210]:
merged_df_120_2 = pd.concat([test_dataset_raw_1_25, test_dataset_raw_2_100], ignore_index=True)

# Shuffle the index
test_120_2 = merged_df_120_2.sample(frac=1, random_state=42).reset_index(drop=True)

In [211]:
test_120_2

Unnamed: 0,text_,target
0,The fitness tracker is an excellent workout co...,0
1,Connections are light duty and can break as mi...,1
2,I fell in love with the illustrations in this ...,1
3,great for the price even came in a back pack t...,1
4,It is a very nice story. It is funny and lovin...,1
...,...,...
120,"Unfortunately, the version sent to me was appa...",1
121,This yoga mat is excellent. It provides enough...,0
122,This cable arrived DOA. It doesn't appear dama...,1
123,"I picked this up, expecting just another short...",1


In [212]:
test_encodings_120_2 = tokenizer(test_120_2['text_'].tolist(), truncation=True, padding=True, max_length=256)

test_dataset_120_2 = tf.data.Dataset.from_tensor_slices((
    dict(test_encodings_120_2),
    test_120_2['target'].values
))

In [216]:
test_dataset_120_2_batched = test_dataset_120_2.batch(16)
y_pred_raw_120_2 = model.predict(test_dataset_120_2_batched)

# Convert raw predictions to class predictions
y_pred_120_2 = np.argmax(y_pred_raw_120_2.logits, axis=1)

# Get the actual class labels
y_true_120_2 = []
for features, label in test_dataset_120_2:
    y_true_120_2.append(label.numpy())
y_true_120_2 = np.array(y_true_120_2)


# Generate a classification report
report_120_2 = classification_report(y_true_120_2, y_pred_120_2, target_names=["CG", "OR"])
print(report_120_2)

              precision    recall  f1-score   support

          CG       0.76      0.64      0.70        25
          OR       0.91      0.95      0.93       100

    accuracy                           0.89       125
   macro avg       0.84      0.79      0.81       125
weighted avg       0.88      0.89      0.88       125

