In [4]:
from transformers import XLNetTokenizer ,XLNetForSequenceClassification

from transformers import BertTokenizer, BertForSequenceClassification
from torch.nn.functional import softmax
import torch
import re
import json

# Paths to the fine-tuned models
bert_model_path = r"I:\CoDarks\public\models\DPBH_BERT_Fine_Tuned_Model"
xlnet_model_path = r"I:\CoDarks\public\models\DPBH_XLNet_Fine_Tuned_Model"

# Load models and tokenizers
bert_tokenizer = BertTokenizer.from_pretrained(bert_model_path)
bert_model = BertForSequenceClassification.from_pretrained(bert_model_path)

xlnet_tokenizer = XLNetTokenizer.from_pretrained(xlnet_model_path)
xlnet_model = XLNetForSequenceClassification.from_pretrained(xlnet_model_path)

max_seq_length = 512

def preprocess_text(tokenizer, text):   
    tokens = tokenizer.tokenize(tokenizer.decode(tokenizer.encode(text, add_special_tokens=True, max_length=max_seq_length, truncation=True)))
    return tokens

def predict_dark_patterns(models, tokenizers, input_text):
    votes = []

    for model, tokenizer in zip(models, tokenizers):
        input_ids = tokenizer.encode(preprocess_text(tokenizer, input_text), return_tensors='pt', max_length=max_seq_length, truncation=True)

        with torch.no_grad():
            outputs = model(input_ids)

        probs = softmax(outputs.logits, dim=1).squeeze()
        predicted_category = torch.argmax(probs).item()

        votes.append(predicted_category)

    return votes

def count_dark_patterns(text_file):
    with open(text_file, 'r', encoding='utf-8') as file:
        text_content = file.read()

    # Map category names to numeric labels
    category_mapping = {"Urgency": 0, "Not Dark Pattern": 1, "Scarcity": 2, "Misdirection": 3, "Social Proof": 4,
                        "Obstruction": 5, "Sneaking": 6, "Forced Action": 7}

    dark_patterns = {category: 0 for category in category_mapping}

    sentences = re.split(r'[\n]', text_content)
    
    ans= {
        "Urgency":[],
        "Scarcity":[],
        "Misdirection":[],
        "Obstruction":[],
        "Sneaking":[],
        "Forced Action":[],
        "Social Proof":[]
    }
    
    for sentence in sentences:
        if not sentence.strip():
            continue

        individual_predictions = predict_dark_patterns([bert_model, xlnet_model],
                                                      [bert_tokenizer, xlnet_tokenizer],
                                                      sentence)
        
#         individual_predictions = predict_dark_patterns([bert_model],
#                                                       [bert_tokenizer],
#                                                       sentence)

        # Get majority voted prediction
        majority_category = max(set(individual_predictions), key=individual_predictions.count)
        category_name = next(key for key, value in category_mapping.items() if value == majority_category)

        if (category_name != "Not Dark Pattern"):
            ans[category_name].append(sentence)
            print(sentence)
        
    return ans

result = count_dark_patterns('I:\CoDarks\server\output.txt')

json_data = json.dumps(result)
with open('I:\CoDarks\server\data.json', 'w') as json_file:
    json_file.write(json_data)

Login
Login
Login
From ₹1,649
From ₹8000
From ₹10190
From ₹5,799
Up to 70% Off
Up to 40% Off
From ₹139
Upto 75% Off
Up to 70% off
Up to 70% Off
Upto 80% Off
Min. 70% Off
Min. 50% Off
Min. 50% Off
Min. 70% Off
ASUS ROG Strix SCAR 16 (2023) Core i9 13th Gen
ASUS Zenbook 14 OLED (2022)
2 in 1 Laptops
No Cost EMI
 Bengaluru, 560103, 
 Bengaluru, 560103, 
 CIN : U51109KA2012PTC066107 
044-45614700
tals NEE Just 215 999
3      Just 15 999
Launch Tomorrow 12PM
Launch Tomorrow 12PM
I  y India s First 32MP Selfie CamTop 50 Deals on ACs  From  26 499 Cartier  Samsung  MarQ  Guaranteed Exchange Offer   Min 24 000
  EEETop 50 Deals on ACs From 726 499 Carrer  Samsung  Mar  Guaranteed Exchange Offer   Min 24 000en
S   Get Upto  25 000 Off
2 Get Upto  25 000 Off
4 Fly to Bliss 
f Akasa Air Flights From 1 299
  ok Flights From 21 299EPSON  tei
tals NEE Just 215 999
3      Just 15 999
Launch Tomorrow 12PM
Launch Tomorrow 12PM
     3   Ye
