In [1]:
!pip install mysql-connector-python



In [2]:
import mysql.connector as msc
conn = msc.connect(
    host="localhost",
    user="root",
    password="wasd",
    database="ir_policy"
)

In [3]:
!pip install transformers



In [4]:
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM

tokenizer = AutoTokenizer.from_pretrained("nsi319/legal-pegasus")
model = AutoModelForSeq2SeqLM.from_pretrained("nsi319/legal-pegasus")

def legal_text_summarizer(input_text, min_length=400, max_length=1300):
    """
    Summarizes legal texts using a pre-trained model. The function allows specifying the minimum and maximum lengths
    for the summary.

    Parameters:
        input_text (str): The legal document text to be summarized.
        min_length (int): Minimum length of the summary in terms of the number of tokens.
        max_length (int): Maximum length of the summary in terms of the number of tokens.

    Returns:
        str: The generated summary of the input text.
    """
    input_text = input_text[:55000]  
    input_tokenized = tokenizer(input_text, return_tensors='pt', truncation=True, padding="max_length", max_length=1024)

    summary_ids = model.generate(
        input_tokenized['input_ids'],  
        num_beams=9,
        no_repeat_ngram_size=3,
        length_penalty=2.0,
        min_length=min_length,
        max_length=max_length,
        early_stopping=True
    )

    summary = tokenizer.decode(summary_ids[0], skip_special_tokens=True, clean_up_tokenization_spaces=False)
    
    return summary

In [None]:
cursor = conn.cursor()

cursor.execute("""
    CREATE TABLE IF NOT EXISTS generated_data (
        app_id INT PRIMARY KEY,
        short_summary TEXT,
        medium_summary TEXT,
        long_summary TEXT
    );
""")
conn.commit()

In [9]:
'''cursor.execute("SELECT * FROM ir_policy.apps_data")

try:
    while True:
        row = cursor.fetchone()
        if row is None:
            break
        print(row)
        
        # The rest of your logic here
        # ...

except mysql.connector.Error as err:
    print("Error:", err)
finally:
    cursor.close()
    conn.close()
'''

(1, 'Texting', 'Whatsapp', 'Skip to content\nWhatsApp Main Page\n\nFeatures\nPrivacy\nHelp Center\nBlog\nWhatsApp Web\nDownload\nLast modified: January 4, 2021 (archived versions)\n\nWhatsApp Privacy Policy\nIf you live in the European Region, WhatsApp Ireland Limited provides the services to you under this Terms of Service and Privacy Policy.\n\nWhatsApp Legal Info\nIf you live outside the European Region, WhatsApp LLC ("WhatsApp," "our," "we," or "us") provides our Services to you under this Terms of Service and Privacy Policy.\n\nOur Privacy Policy ("Privacy Policy") helps explain our data practices, including the information we process to provide our Services.\n\nFor example, our Privacy Policy talks about what information we collect and how this affects you. It also explains the steps we take to protect your privacy, like building our Services so delivered messages aren’t stored by us and giving you control over who you communicate with on our Services.\n\nWe are one of the Meta C

In [None]:
import csv

with open('output_data.csv', mode='r') as infile, open('results.csv', mode='w', newline='') as outfile:
    reader = csv.reader(infile)
    writer = csv.writer(outfile)
    writer.writerow(['app_id', 'short_summary', 'medium_summary', 'long_summary'])

    next(reader, None)  

    for row in reader:
        app_id = row[0]
        privacy_policy = row[3]

        short_summary = legal_text_summarizer(privacy_policy, 400, 500)
        medium_summary = legal_text_summarizer(privacy_policy, 850, 900)
        long_summary = legal_text_summarizer(privacy_policy, 1200, 1300)

        writer.writerow([app_id, short_summary, medium_summary, long_summary])

print("Summaries generated and written to results.csv")

In [26]:
input_text = input()

In [29]:
preprocessed_text = preprocess_text(input_text)
short_summary = legal_text_summarizer(preprocessed_text, 400, 500)

In [30]:
print(short_summary)

This privacy policy explains the types of personal information we collect from you through our stores, website and mobile applications. It also describes the purposes for which we collect that personal information, the other parties with whom we may share it and the measures we take to protect the security of your data. You are advised to carefully read this Fortune ,Haldirams ,Ferrero before Ranchiing or availing any of our products and or services. Amritsar includes non personal information and sensitive personal information about you, which either directly or indirectly in combination with other information, could allow you to be identified when you visit our stores or mobile application. You can choose not to receive marketing emails from Ranchi or click on the unsubscribe link in any marketing email. Account details, profile and password details and any frequent flyer or travel partner affiliation affiliation information are also collected when you interact with us on our website 

In [31]:
print(legal_text_summarizer(input_text, 400, 500))

This Privacy Notice describes the types of personal information we collect from you through our stores, website (including sub-domains and microsites) and mobile applications. It also describes the purposes for which we collect that personal information, the other parties with whom we may share it and the measures we take to protect the security of your data. This Notice also tells you about your rights and choices with respect to your personal information and how you can contact us about our privacy practices. You are advised to carefully read this Privacy Notice using or availing any of our products and/or services. Data includes non-personal information, personal data and sensitive personal information about you, which either directly or indirectly in combination with other information, could allow you to be identified when you visit our stores or mobile application. This Data includes, without limitation, contact information: first and last name, email address, postal address, coun

In [1]:
cursor = conn.cursor()

cursor.execute("SELECT * FROM ir_policy.apps_data WHERE app_id >= 37")
rows = cursor.fetchall()

for row in rows:
    app_id = row[0]
    input_text = row[3]

    short_summary = legal_text_summarizer(input_text, 400, 500)

    write_cursor = conn.cursor()

    insert_sql = """
    INSERT INTO generated_data (app_id, short_summary)
    VALUES (%s, %s)
    ON DUPLICATE KEY UPDATE
        short_summary = VALUES(short_summary);
    """

    write_cursor.execute(insert_sql, (app_id, short_summary))
    conn.commit() 
    write_cursor.close()

cursor.close()  
conn.close()  

In [2]:
cursor = conn.cursor()

cursor.execute("SELECT * FROM ir_policy.apps_data WHERE app_id >= 14")
rows = cursor.fetchall()
for row in rows:
    app_id = row[0]
    input_text = row[3]

    medium_summary = legal_text_summarizer(input_text, 750, 900)
    write_cursor = conn.cursor()

    insert_sql = """
    INSERT INTO generated_data (app_id, medium_summary)
    VALUES (%s, %s)
    ON DUPLICATE KEY UPDATE
        medium_summary = VALUES(medium_summary);
    """

    write_cursor.execute(insert_sql, (app_id, medium_summary))
    conn.commit() 
    write_cursor.close()

cursor.close()  
conn.close()  

# For long term 

In [None]:
# cursor = conn.cursor()

# cursor.execute("SELECT * FROM ir_policy.apps_data WHERE app_id >= 14")
# rows = cursor.fetchall()
# for row in rows:
#     app_id = row[0]
#     input_text = row[3]

#     long_summary = legal_text_summarizer(input_text, 1200, 1400)
#     write_cursor = conn.cursor()

#     insert_sql = """
#     INSERT INTO generated_data (app_id, long_summary)
#     VALUES (%s, %s)
#     ON DUPLICATE KEY UPDATE
#         long_summary = VALUES(long_summary);
#     """

#     write_cursor.execute(insert_sql, (app_id, long_summary))
#     conn.commit() 
#     write_cursor.close()

# cursor.close()  
# conn.close()  

In [3]:
# cursor.execute("SELECT * FROM ir_policy.apps_data")
# rows = cursor.fetchall() 

# for row in rows:
#     app_id = row[0]
#     input_text = row[3]

#     short_summary = legal_text_summarizer(input_text, 400, 500)
#     medium_summary = legal_text_summarizer(input_text, 850, 900)
#     long_summary = legal_text_summarizer(input_text, 1200, 1300)

#     # Use a different cursor for the write operations
#     write_cursor = conn.cursor()

#     insert_sql = """
#     INSERT INTO generated_data (app_id, short_summary, medium_summary, long_summary)
#     VALUES (%s, %s, %s, %s)
#     ON DUPLICATE KEY UPDATE
#         short_summary = VALUES(short_summary),
#         medium_summary = VALUES(medium_summary),
#         long_summary = VALUES(long_summary);
#     """
#     # Execute the insertion/update query
#     write_cursor.execute(insert_sql, (app_id, short_summary, medium_summary, long_summary))
#     conn.commit()  # Commit each insert
#     write_cursor.close()

# cursor.close()  # Close the original cursor
# conn.close() 