In [2]:
import os
import pandas as pd

DATA_DIR = "../data"  

def load_policies(data_dir):
    policy_records = []
    
    for filename in os.listdir(data_dir):
        if filename.endswith('.txt'):
            platform_name = filename.replace('.txt', '')
            filepath = os.path.join(data_dir, filename)
            
            with open(filepath, 'r', encoding='utf-8') as file:
                policy_content = file.read().strip()
            
            policy_records.append({
                "Platform": platform_name,
                "Policy": policy_content
            })
    
    return pd.DataFrame(policy_records)

# Load the policies into df
policy_df = load_policies(DATA_DIR)

print(f"Number of policies loaded: {len(policy_df)}")
policy_df.head()


Number of policies loaded: 54


Unnamed: 0,Platform,Policy
0,Signal,Signal >> Terms of Service & Privacy Policy ...
1,Supernova,Privacy Notice - Supernova\n\n\n\n\n\n\n\n\n\n...
2,TruthSocial,Help Center\nTruth +\nFAQ\nDMs\nVerification\n...
3,SilverSingles,Privacy\nINTRODUCTION\n\nWelcome to the Silver...
4,Tinder,Privacy Policy | Tinder | Match. Chat. Meet. M...


In [3]:
import google.generativeai as genai
genai.configure(api_key='AIzaSyBeZUBCPlKrAKwsm6UoAp4Kx-s_rckKgxA')
model = genai.GenerativeModel('gemini-1.5-flash-8b')

def compare_policies_gemini(platform_a, platform_b, df):

    policy_a = df[df['Platform'] == platform_a]['Policy'].iloc[0]
    policy_b = df[df['Platform'] == platform_b]['Policy'].iloc[0]
    
    prompt = f"""
    You are a helpful assistant specialized in analyzing privacy policies.

    Compare the privacy policies of "{platform_a}" and "{platform_b}". Present the differences clearly in a concise tabular format.

    The table should have these columns:
    | Privacy Aspect | {platform_a} | {platform_b} |

    Clearly include these privacy aspects:
    - Data Collection
    - Data Sharing
    - User Rights
    - Cookies
    - Third-party Data
    - Data Retention
    - Security Measures

    Privacy policy of {platform_a}:
    {policy_a}

    Privacy policy of {platform_b}:
    {policy_b}
    """
    
    response = model.generate_content(prompt)
    
    return response.text

In [4]:
# Test
comparison_result = compare_policies_gemini("Facebook", "LinkedIn", policy_df)
print(comparison_result)

Analyzing the provided privacy policies, here's a comparison table highlighting the key differences between Facebook (Meta) and LinkedIn:

| Privacy Aspect | Facebook (Meta) | LinkedIn |
|---|---|---|
| **Data Collection** | Extremely broad, encompassing everything from user-provided information (including sensitive data) to activity logs, device data (even when location services are off), information from partners (including third-party websites), and inferred data.  Collects data from account holders and non-account holders.  Explicitly mentions religious views, sexual orientation, political views, health data, and other potentially sensitive information. | Focuses on professional data. Collects information directly from users (name, contact, work history, skills, etc.), data from others' profiles, activity logs, and inferred data.  Collects data from partners and other sources, but emphasizes professional connections and employment information. |
| **Data Sharing** | Shares data acr