### Take in the privacy stories from an application and use the LLM to determine appropriate solutions. 



In [1]:
import pandas as pd

from conf import data_dir

#################################################
# Manually gathered privacy stories for example #
#################################################


privacy_stories = [
    """1. **We collect Personal Data and Usage Data** to enhance operational efficiency and user experience.
   - **Purpose:** To provide and maintain the service, including monitoring the usage of our service. This assists in improving service functionalities and ensuring a smoother user experience.""",

    """2. **We use Personal Data** for account management and service delivery.
   - **Purpose:** To manage user registrations and provide access to various functionalities of the service that are available to registered users. This data is also used to perform contracts by ensuring the delivery of products, items, or services as agreed.""",

    """3. **We use Personal Data** for communication purposes.
   - **Purpose:** To contact users via email, telephone calls, SMS, or other electronic communications regarding updates, security information, or promotional communications related to functionalities, products, or contracted services.""",

    """4. **We use Personal Data** for promotional and analytical purposes.
   - **Purpose:** To provide users with news, special offers, and general information about similar goods, services, and events which might interest them, and also for data analysis to identify usage trends and evaluate the effectiveness of promotional campaigns.""",

    """5. **We share Personal Data with Service Providers and Affiliates** to enhance service delivery.
   - **Purpose:** To monitor and analyze the usage of our service and to maintain service standards across different entities within our corporate family, ensuring consistent adherence to our privacy policy.""",

    """6. **We share Personal Data** in the context of business transactions.
   - **Purpose:** In the event of mergers, sales of company assets, or acquisition, personal data may be transferred as part of the assets, facilitating business continuity or integration into new corporate structures.""",

    """1. **We collect Personal Data** (such as Usage Data including IP addresses, browser types, visit timings, and other diagnostic data) **for the purpose of Functionality** to provide and maintain our service, manage user accounts, and optimize service delivery.""",

    """2. **We use Personal Data** to **manage your account** for the purpose of Functionality, enabling access to different functionalities of the service for registered users.""",

    """3. **We contact You** using Personal Data (via email, telephone calls, SMS, or other forms of electronic communication) **for the purpose of Contact**, to provide updates and communications about functionalities, products, or contracted services, and to inform about necessary security updates.""",

    """4. **We share Personal Data with Service Providers** for the purpose of Analytics and Functionality, to monitor and analyze service usage and assist in the operation of our service.""",

    """5. **We use Personal Data** for **the purpose of Analytics**, to perform data analysis, identify usage trends, determine the effectiveness of promotional campaigns, and evaluate and improve our service, products, services, and marketing strategies.""",

    """6. **We use Personal Data** for business transfers such as mergers or acquisitions **for the purpose of Analytics**, considering it as among the assets transferred.""",

    """7. **We use Personal Data** to **provide you with news, special offers, and information** about goods, services, or events similar to those you have already purchased or inquired about **for the purpose of Advertisement**.""",

    """8. **We share Personal Data with Affiliates** who are required to honor this Privacy Policy **for the purpose of Functionality**, which supports integrated and efficient service offerings.""",

    """1. **We share Personal Data with service providers** for the purpose of analytics and internal analysis.""",

    """2. **We share Personal Data with affiliates** to ensure adherence to the same privacy policy across our controlled entities.""",

    """3. **We share Personal Data with business partners** to offer specific products, services, or promotions.""",

    """4. **We share Personal Data publicly when users interact in public areas** allowing it to be viewed or distributed outside for the purpose of public user interaction.""",

    """5. **We share Personal Data with explicit consent for any other purpose** where user consent has been explicitly given, ensuring respect for user's control over their information.""",

    """6. **We retain Personal Data only as long as necessary** for the purpose of policy compliance, to comply with our legal obligations, to resolve disputes, and to enforce our legal agreements and policies.""",

    """7. **We retain Usage Data for internal analysis** but generally for a shorter period unless required for security enhancements or to improve functionality, or when legally obligated."""
]

stories_list = "\n".join(privacy_stories)

"""
Gather stories from llm output files
"""

chosen_file = 'only_privacy_stories_1-5.csv'
df = pd.read_csv(f"{data_dir}/{chosen_file}")



In [3]:
import json
import os 

from conf import data_dir

# Read the JSON file
with open(f'{data_dir}/privacy_patterns.json', 'r') as f:
    data = json.load(f)

# Initialize an empty list to store the problems
problems_list = []

# Iterate over each item in the JSON data and extract the context and problem entries
for pattern_url, item in data.items():
    context = item.get('context', '')
    problem = item.get('problem', '')
    pattern_name = pattern_url.split('/')[-1]  # Extract the pattern name from the URL
    problems_list.append({
        'pattern': pattern_name,
        'context': context,
        'problem': problem
    })

with open(f'{data_dir}/pbd_text/pbd_wiki.txt', encoding='latin-1') as f:
    pbd_wiki = f.read()

pbd_booklet = {}
for file_name in os.listdir(f'{data_dir}/pbd_text/pbd_booklet'):
    with open(f'{data_dir}/pbd_text/pbd_booklet/{file_name}', encoding='latin-1') as f:
        pbd_booklet[file_name] = f.read()

print(pbd_booklet)



{'abstract.txt': '5 Abstract\nLimit as much as possible the detail in which personal data\nis processed.\nWhile â\x80\x98minimiseâ\x80\x99 forces one to decide whether or not to process a particular piece of personal data, â\x80\x98abstractâ\x80\x99 addresses the more subtle question\nof the level of detail in which to process personal data. The less detailed\na personal data item is, the more we â\x80\x98zoom outâ\x80\x99, the lower the privacy\nrisk is.\n5.1 Tactics\nProcessing personal data in less detail can be done both at the data subject\nlevel as well as the attribute level. The following three tactics apply.\nSummarise Summarise detailed attributes into more coarse-grained, general attributes. For example, use an age category instead of a birth\ndate, or a city of residence instead of a full address, whenever possible.\nGroup Aggregate information about a group of people instead of processing personal information for each person in the group separately. Compile group profiles 

In [4]:
# Generate initialization string 
build_string = "You are a helpful AI assistant"
# Generate the prompt
prompt = ( 
    f"Now read through this list of stories:\n\n{stories_list}\n\n"
    "Identify which of the following strategies the system described"
    "by these stories which define there practices of private data, "
    "could be adapted to improve there system with explanation. At the"
    "end write a list of what of the 9 strategies are most needed in json"
)

prompt += pbd_wiki

def add_pbd_stuff(pbd_stuff, prompt):
    for file_name, content in pbd_stuff.items():
        prompt += content
    return prompt


# Add each problem entry to the prompt
def add_ppatterns(prompt):
    prompt += "Identify which of the following patterns are related to the privacy stories:\n"
    for problem in problems_list:
        prompt += f"Pattern : ['{problem['pattern']}'] - Context: {problem['context']} : Problem: {problem['problem']}\n"
    # Add output instructions 
    prompt += (
        "Provide your answers in a list of patterns with the format: {pattern} is related with these {selected stories related to that pattern} because"
    )
    return prompt



print(prompt)

Now read through this list of stories:

1. **We collect Personal Data and Usage Data** to enhance operational efficiency and user experience.
   - **Purpose:** To provide and maintain the service, including monitoring the usage of our service. This assists in improving service functionalities and ensuring a smoother user experience.
2. **We use Personal Data** for account management and service delivery.
   - **Purpose:** To manage user registrations and provide access to various functionalities of the service that are available to registered users. This data is also used to perform contracts by ensuring the delivery of products, items, or services as agreed.
3. **We use Personal Data** for communication purposes.
   - **Purpose:** To contact users via email, telephone calls, SMS, or other electronic communications regarding updates, security information, or promotional communications related to functionalities, products, or contracted services.
4. **We use Personal Data** for promotio

In [9]:


import openai
from openai import OpenAI
import os
import json
import time
import pandas as pd 

from langchain_community.llms import Ollama

phi3 = Ollama(model="phi3")

from utils.secrets import OPENAI_API_KEY
os.environ["OPENAI_API_KEY"] = OPENAI_API_KEY
client = OpenAI()


marker = time
output_dir = 'output'

models = ["gpt-4-turbo-2024-04-09","gpt-4-0125-preview","gpt-4-0125-preview","gpt-4-turbo-preview",
          "gpt-4-1106-preview","gpt-4-32k","gpt-3.5-turbo-0125","gpt-3.5-turbo-1106",]

# Set the output directory
output_dir = 'output'

# Set the model name
model_name = "gpt-4-turbo-2024-04-09"

'''
# Prompt the model
response = client.chat.completions.create(
                model=model_name,
                messages=[
                    {"role": "system", "content": build_string},
                    {"role": "user", "content": prompt}
                ],
                max_tokens= 1000
            )
            
# Print the completion text
response = response.choices[0].message.content.strip()
'''

response = phi3.invoke(prompt)
print(response)


As we delve into enhancing user privacy in information systems through strategic design approaches, let's expand on each Noonymization Strategy with examples and considerations:


**Strategy #2: Separate - Data Segregation**
Separating the processing or storage of personal data can be achieved by creating distinct databases for different types of user information. For instance, an online retailer could store purchase history separately from a customer's contact details and preferences. This separation ensures that even if one database is compromised, it doesn't reveal complete profiles of individuals.


**Strategy #3: Abstract - Aggregated Data Analysis**
Aggregating personal data can reduce privacy risks by summarizing information at a group level. For example, smart city applications might report traffic patterns per district rather than individual cars to protect anonymity while providing valuable insights for urban planning.


**Strategy #4: Hide - Encryption and Anonymization Tech

In [7]:
import os
import shutil

source_dir = '../policies_descriptions'
destination_dir = 'input/policies_descriptions'

# Create the destination directory if it doesn't exist
os.makedirs(destination_dir, exist_ok=True)

# Function to sanitize file names
def sanitize_file_name(file_name):
    valid_chars = "-_.() %s%s" % (string.ascii_letters, string.digits)
    return ''.join(c if c in valid_chars else '_' for c in file_name)

# Recursively traverse all directories within source_dir
for root, dirs, files in os.walk(source_dir):
    for file in files:
        # Check if the file ends with privacy_policy.txt
        if file.endswith('privacy_policy.txt'):
            # Construct the source and destination paths
            source_path = os.path.join(root, file)
            destination_path = os.path.join(destination_dir, sanitize_file_name(file))
            
            # Check if the source file exists
            if os.path.exists(source_path):
                # Copy the file to the destination directory
                shutil.copy2(source_path, destination_path)
            else:
                print(f"File not found: {source_path}")


File not found: ../policies_descriptions\appinventor.ai_ff_diamonds_fff_diamond_fire_app.Cashout_Make_Smart_Money_Apps_Free_Mobile_Android_2023\appinventor.ai_ff_diamonds_fff_diamond_fire_app.Cashout_Make_Smart_Money_Apps_Free_Mobile_Android_2023_privacy_policy.txt
File not found: ../policies_descriptions\com.coloring_asmr_draw_alphabet.tatto_sexy_adult_paint_by_numbers_coloring_pages\com.coloring_asmr_draw_alphabet.tatto_sexy_adult_paint_by_numbers_coloring_pages_privacy_policy.txt
File not found: ../policies_descriptions\com.gameballvalley.play.win.make.earn.real.money.rewards.cash.gift.cards.survey.paid\com.gameballvalley.play.win.make.earn.real.money.rewards.cash.gift.cards.survey.paid_privacy_policy.txt
File not found: ../policies_descriptions\com.gamelife.life.rewards.games.play.win.money.free.cash.gift.cards.paid.survey.lucky\com.gamelife.life.rewards.games.play.win.money.free.cash.gift.cards.paid.survey.lucky_privacy_policy.txt
File not found: ../policies_descriptions\com.hista