## load al files from model folder

In [328]:
version = 5

In [329]:
model_folder = f"model_v{version}"

## Load model

In [330]:
from tensorflow import keras

model = keras.models.load_model(f'{model_folder}/model.h5')

## Load tokenizer

In [331]:
from tensorflow.keras.preprocessing.text import tokenizer_from_json
import json

# Load tokenizer configuration from the file
with open(f'{model_folder}/tokenizer_config.json', 'r') as json_file:
    tokenizer_config_str = json_file.read()

# Create a tokenizer instance using tokenizer_from_json
tokenizer = tokenizer_from_json(tokenizer_config_str)


## Load the training data

In [332]:
import pickle

# Specify the file path where you saved the data
pickle_file_path = f'{model_folder}/training_data.pkl'

# Load the training_data dictionary from the Pickle file
with open(pickle_file_path, 'rb') as pickle_file:
    loaded_training_data = pickle.load(pickle_file)

# Access the loaded data
max_words = loaded_training_data['max_words']
max_sequence = loaded_training_data['max_sequence']
legend = loaded_training_data['legend']
labels_legend_inverted = loaded_training_data['labels_legend_inverted']

In [333]:
loaded_training_data

{'max_words': 15,
 'max_sequence': 15,
 'legend': {'No': 0, 'Yes': 1},
 'labels_legend_inverted': {'0': 'No', '1': 'Yes'}}

## define the stem function

In [334]:
import nltk
from nltk.stem import PorterStemmer
from nltk.tokenize import word_tokenize
nltk.download('punkt')  # Download the punkt tokenizer if you haven't already

def stem_text(text):
    stemmer = PorterStemmer()
    tokens = word_tokenize(text)
    stemmed_tokens = [stemmer.stem(word) for word in tokens]
    stemmed_text = ' '.join(stemmed_tokens)
    return stemmed_text

[nltk_data] Downloading package punkt to
[nltk_data]     C:\Users\pc\AppData\Roaming\nltk_data...
[nltk_data]   Package punkt is already up-to-date!


## define the pridection function

In [335]:
import numpy as np
from tensorflow.keras.preprocessing.sequence import pad_sequences

def predict(text_str, max_sequence=max_sequence, tokenizer=None, model=None, labels_legend_inverted=None):
    if not tokenizer or not model or not labels_legend_inverted:
        return None
    
    #stemming the input text
    text_str = stem_text(text_str)
    
    # Tokenize the input text
    sequences = tokenizer.texts_to_sequences([text_str])
    
    # Pad the sequence
    x_input = pad_sequences(sequences, maxlen=max_sequence)
    
    # Predict using the model
    y_output = model.predict(x_input,verbose=0)
    
    # Assuming you want to get the label with the highest probability
    top_y_index = np.argmax(y_output, axis=-1)[0]
    preds = y_output[0][top_y_index]
    
    labeled_preds = {labels_legend_inverted[str(top_y_index)]: float(preds)}
    
    return labels_legend_inverted[str(top_y_index)],labeled_preds




## Example usage

In [336]:
# Example usage
msg="best drugs 5ml"
y_pred,prediction = predict(msg, tokenizer=tokenizer, model=model, labels_legend_inverted=labels_legend_inverted)
print(msg)
print(y_pred,prediction)
print()


msg="gun"
y_pred,prediction = predict(msg , tokenizer=tokenizer, model=model, labels_legend_inverted=labels_legend_inverted)
print(msg)
print(y_pred,prediction)
print()

msg="Dearjoyee sex dolls for men silicon"
y_pred,prediction = predict(msg, tokenizer=tokenizer, model=model, labels_legend_inverted=labels_legend_inverted)
print(msg)
print(y_pred,prediction)
print()

msg="10 Modes Clitoral Stimulator Female Masturbator Bullet Vibrator Sex Toys Makeup Brush Vibrator"
y_pred,prediction = predict(msg, tokenizer=tokenizer, model=model, labels_legend_inverted=labels_legend_inverted)
print(msg)
print(y_pred,prediction)
print()

msg="mouse gaming"
y_pred,prediction = predict(msg, tokenizer=tokenizer, model=model, labels_legend_inverted=labels_legend_inverted)
print(msg)
print(y_pred,prediction)
print()

msg="sexy"
y_pred,prediction = predict(msg, tokenizer=tokenizer, model=model, labels_legend_inverted=labels_legend_inverted)
print(msg)
print(y_pred,prediction)
print()

msg="Wholesale Cheap Price Sex Products Adult Toy Female Clitoris Vibrator Silicone G Spot Rabbit Vibrator Sex Toy For Women"
y_pred,prediction = predict(msg, tokenizer=tokenizer, model=model, labels_legend_inverted=labels_legend_inverted)
print(msg)
print(y_pred,prediction)
print()

best drugs 5ml
No {'No': 0.999881386756897}

gun
No {'No': 0.8199013471603394}

Dearjoyee sex dolls for men silicon
No {'No': 0.978752076625824}

10 Modes Clitoral Stimulator Female Masturbator Bullet Vibrator Sex Toys Makeup Brush Vibrator
No {'No': 0.9999982118606567}

mouse gaming
Yes {'Yes': 0.8632659316062927}

sexy
No {'No': 0.9900497198104858}

Wholesale Cheap Price Sex Products Adult Toy Female Clitoris Vibrator Silicone G Spot Rabbit Vibrator Sex Toy For Women
No {'No': 0.9999967813491821}



In [205]:
stem_text("sexy")

'sexi'

## test the model

In [337]:
import pandas as pd
data = pd.read_csv('data/data.txt') #model_v3/data.csv

data = data[data["Is Accepted Policy"]!="Is Accepted Policy"]
data = data.drop_duplicates(subset='Name of Product', keep='first')

data

Unnamed: 0,Name of Product,Is Accepted Policy
0,Ultra-Thin Gaming Laptop with RGB Backlit Keyb...,Yes
1,Deluxe Espresso Machine with Integrated Milk F...,Yes
2,Advanced Running Shoes with Responsive Cushion...,Yes
3,Latest iPhone Pro Max with Triple-Camera Syste...,Yes
4,Next-Gen Gaming Console with 4K Graphics and E...,Yes
...,...,...
10343,Hot Selling Aluminum Alloy Portable Lightweigh...,No
10344,High Quality Stainless Steel Kitchen knife Wit...,No
10345,New Arrival Folding Blade Knives Outdoor Survi...,No
10346,16212 Komioh 162 cm mature pussy 13cm 18cm 32c...,No


In [338]:
from sklearn.model_selection import train_test_split

X = data["Name of Product"].tolist()
y = data["Is Accepted Policy"].tolist()
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)


In [339]:
len(X_test[:733])

733

In [340]:
# Function to convert 'Yes' and 'No' to binary labels (1 and 0)
def convert_labels(label):
    return 1 if label == 'Yes' else 0

# Convert 'Is Accepted Policy' to binary labels for y_test
y_test_binary = [convert_labels(label) for label in y_test]

# Initialize lists to store predicted labels
y_pred_binary = []

# Predict labels for X_test
for i in range(733):#len(X_test)
    y_pred, _ = predict(X_test[i], tokenizer=tokenizer, model=model, labels_legend_inverted=labels_legend_inverted)
    y_pred_binary.append(convert_labels(y_pred))


In [341]:
len(y_test_binary[:733])

733

In [342]:
from sklearn.metrics import confusion_matrix

print("confusion matrix:")
confusion_matrix(y_test_binary[:733], y_pred_binary)

confusion matrix:


array([[542,  29],
       [  5, 157]], dtype=int64)

In [343]:
from sklearn.metrics import classification_report

# Calculate and print classification report
print("Classification Report:")
print(classification_report(y_test_binary[:733], y_pred_binary))

Classification Report:
              precision    recall  f1-score   support

           0       0.99      0.95      0.97       571
           1       0.84      0.97      0.90       162

    accuracy                           0.95       733
   macro avg       0.92      0.96      0.94       733
weighted avg       0.96      0.95      0.95       733



In [344]:
product_titles = [
    "Croc Textured Two Fold Wallet",
    "Men Striped Sliders",
    "Printed A-line Kurta",
    "Girls Floral Printed T-shirt",
    "Women Printed Kurta with Skirt",
    "Floral Cotton Maxi Dress",
    "Fitted Crop Top",
    "Men Mesh Running Shoes",
    "Men Checked Pullover",
    "Girls Cotton Jersey Top",
    "Ethnic Motifs Kaftan Dress",
    "Leather Wedge Sandals with Tassels",
    "Leather Wedge Pumps",
    "V015 Professional Hair Trimmer",
    "Women Open Toe Flats"
]

for title in product_titles:
    y_pred, prediction = predict(title, tokenizer=tokenizer, model=model, labels_legend_inverted=labels_legend_inverted)
    print(title)
    print(y_pred, prediction)
    print()


Croc Textured Two Fold Wallet
No {'No': 0.7121512293815613}

Men Striped Sliders
Yes {'Yes': 0.9679132699966431}

Printed A-line Kurta
No {'No': 0.5195634365081787}

Girls Floral Printed T-shirt
Yes {'Yes': 0.5957861542701721}

Women Printed Kurta with Skirt
Yes {'Yes': 0.7515829205513}

Floral Cotton Maxi Dress
Yes {'Yes': 0.8700396418571472}

Fitted Crop Top
Yes {'Yes': 0.962540864944458}

Men Mesh Running Shoes
Yes {'Yes': 0.9768970608711243}

Men Checked Pullover
Yes {'Yes': 0.9679132699966431}

Girls Cotton Jersey Top
Yes {'Yes': 0.9314157962799072}

Ethnic Motifs Kaftan Dress
Yes {'Yes': 0.8991736769676208}

Leather Wedge Sandals with Tassels
Yes {'Yes': 0.8526319861412048}

Leather Wedge Pumps
Yes {'Yes': 0.6943241953849792}

V015 Professional Hair Trimmer
Yes {'Yes': 0.9789875149726868}

Women Open Toe Flats
Yes {'Yes': 0.5097216367721558}



In [345]:
product_titles = [
    "LIFEWORTH Private Label Extra Strength Health Herbal Supplements Tongkat Ali Extract For Men With Maca",
    "Private Label NMN Supplement Anti Aging Purity Capsule",
    "Dearjoyee sexy dolls for men silicon xxx-18-www-sexy-girl-doll-video/of real vagina and anus/porno sexi with dolls",
    "10 Modes Clitoral Stimulator Female Masturbator Bullet Vibrator Sex Toys Makeup Brush Vibrator",
    "The New Rechargeable Vibrator Wand - 12 Patterns G Spot Vibrators Clit Vibrator Sex Toys Personal Wand Massager",
    "Quaige 3 in 1 mini muneca del sexo otros productos sexuales munecas de plastico para adultos boneco sexual",
    "Wholesale Cheap Price Sex Products Adult Toy Female Clitoris Vibrator Silicone G Spot Rabbit Vibrator Sex Toy For Women",
    "Strong Sleeping Pills 1000 Mg Melatonin Capsule Powder Extract Sleep Improving Private Label",
    "US warehouse deliver easy use mens sex toys vagina pussy dolls excellent quality sex dolls big butt sex toy",
    "Wholesale Price Veterinary Weight Gain Supplements For Animals without side effects and maximum efficiency"

]

for title in product_titles:
    y_pred, prediction = predict(title, tokenizer=tokenizer, model=model, labels_legend_inverted=labels_legend_inverted)
    print(title)
    print(y_pred, prediction)
    print()

LIFEWORTH Private Label Extra Strength Health Herbal Supplements Tongkat Ali Extract For Men With Maca
Yes {'Yes': 0.993017315864563}

Private Label NMN Supplement Anti Aging Purity Capsule
No {'No': 0.9660723209381104}

Dearjoyee sexy dolls for men silicon xxx-18-www-sexy-girl-doll-video/of real vagina and anus/porno sexi with dolls
No {'No': 0.9999964237213135}

10 Modes Clitoral Stimulator Female Masturbator Bullet Vibrator Sex Toys Makeup Brush Vibrator
No {'No': 0.9999982118606567}

The New Rechargeable Vibrator Wand - 12 Patterns G Spot Vibrators Clit Vibrator Sex Toys Personal Wand Massager
No {'No': 0.9999971389770508}

Quaige 3 in 1 mini muneca del sexo otros productos sexuales munecas de plastico para adultos boneco sexual
No {'No': 0.93863445520401}

Wholesale Cheap Price Sex Products Adult Toy Female Clitoris Vibrator Silicone G Spot Rabbit Vibrator Sex Toy For Women
No {'No': 0.9999967813491821}

Strong Sleeping Pills 1000 Mg Melatonin Capsule Powder Extract Sleep Improvin

In [346]:
#no for v2,v3,
#yes for v1,v4


In [354]:
import pandas as pd
data = pd.read_csv('data/data.txt')
product_titles =data.tail(200)["Name of Product"].tolist()

yes=0
no=0
for title in product_titles:
    y_pred, prediction = predict(title, tokenizer=tokenizer, model=model, labels_legend_inverted=labels_legend_inverted)
    """print(title)
    print(y_pred, prediction)
    print()"""
    if y_pred == "No":
        no+=1
    else:
        yes+=1
        

In [355]:
print("yes : ",yes)
print("no : ",no)

yes :  6
no :  194


In [None]:
df=data.tail(200)
print('the data with No in Is Accepted Policy',len(df[df["Is Accepted Policy"]=="No"]))
print('the data with Yes in Is Accepted Policy',len(df[df["Is Accepted Policy"]=="Yes"]))