In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [None]:
# Loading the dataset

import pandas as pd
import random

# Load the test dataset
test_data = pd.read_csv("/kaggle/input/llms-you-cant-please-them-all/test.csv")

# Preview the dataset
print("Test Data Sample:")
print(test_data.head())


In [None]:
#Function for Essay Generation 
def generate_essay(topic):
    """
    Generate an essay for a given topic using structured templates with randomized elements.
    """
    # Random introduction
    introductions = [
        f"The topic '{topic}' invites a broad spectrum of interpretations.",
        f"Exploring '{topic}' brings to light diverse and thought-provoking perspectives.",
        f"'{topic}' is a theme that challenges conventional thinking and sparks debate."
    ]
    
    # Random body content
    body_templates = [
        f"On one hand, it can be argued that {{}}. On the other, some may contend that {{}}.",
        f"The significance of '{topic}' lies in {{}}. However, opposing views suggest that {{}}.",
        f"Proponents of '{topic}' argue that {{}}. Yet, critics counter this by asserting {{}}."
    ]
    body_fillers = [
        ("it fosters innovation and progress", "it risks neglecting cultural traditions"),
        ("it empowers individuals to think freely", "it creates challenges for societal cohesion"),
        ("it promotes equality and fairness", "it can introduce unforeseen complexities")
    ]
    
    # Random conclusion
    conclusions = [
        f"In conclusion, '{topic}' demonstrates the intricate balance of progress and tradition.",
        f"Ultimately, discussing '{topic}' sheds light on the complexity of modern challenges.",
        f"Debating '{topic}' highlights the diversity of perspectives in our global society."
    ]
    
    # Assemble the essay
    introduction = random.choice(introductions)
    body = random.choice(body_templates).format(*random.choice(body_fillers))
    conclusion = random.choice(conclusions)
    
    essay = f"{introduction}\n\n{body}\n\n{conclusion}"
    return essay


In [None]:
#Generate Essays for All Topics

# Generate essays for all topics in the test dataset
test_data['essay'] = test_data['topic'].apply(generate_essay)

# Preview generated essays
print("Generated Essays Sample:")
print(test_data[['id', 'essay']].head())


In [None]:
# Save the generated essays 

submission = test_data[['id', 'essay']]
submission.to_csv("submission.csv", index=False)

print("Submission file 'submission.csv' created successfully!")


In [None]:
# Load and preview the saved submission file
submission_preview = pd.read_csv("submission.csv")
print("Submission File Preview:")
print(submission_preview.head())


#Better version: Including Leverage Advanced Language Generation Techniques, Optimize for Metrics in the Scoring Function, Experiment with Adversarial Techniques, Simulate and Validate Locally, Use Topic-Specific Knowledge, Generate Multiple Essays per Topic, Collaboration and Feedback

In [None]:
import pandas as pd
import random
import numpy as np

# Load the test dataset
test_data = pd.read_csv("/kaggle/input/llms-you-cant-please-them-all/test.csv")

print("Test Data Sample:")
print(test_data.head())


In [None]:
#Advanced language generation

def generate_essay_advanced(topic):
    """
    Generate essays using advanced techniques such as contradictory views, unique tones, and uncommon formats.
    """
    # Contradictory viewpoints
    contradictions = [
        f"While '{topic}' is often celebrated for its potential to {random.choice(['enhance', 'deter'])} society, skeptics argue otherwise.",
        f"Some view '{topic}' as a beacon of progress, while others consider it a Pandora's box of challenges.",
    ]
    
    # Uncommon formats (e.g., debate style)
    formats = [
        f"In the context of '{topic}', let us imagine a debate:\n- Speaker 1: '{random.choice(['Pro', 'Con'])}'\n- Speaker 2: '{random.choice(['Con', 'Pro'])}'",
        f"An alien civilization might interpret '{topic}' as a {random.choice(['threat', 'opportunity'])}.",
    ]
    
    # Random selection and assembly
    essay = f"{random.choice(contradictions)}\n\n{random.choice(formats)}"
    return essay


In [None]:
#Optimize for Metrics

def optimize_essay(topic):
    """
    Create essays optimized for variance and low repetition.
    """
    introductions = [
        f"The topic '{topic}' sparks curiosity and debate.",
        f"'{topic}' challenges us to consider new paradigms.",
    ]
    
    body = f"The potential of '{topic}' lies in {random.choice(['empowering innovation', 'raising ethical dilemmas'])}. However, it is not without risks, such as {random.choice(['moral ambiguity', 'unintended societal consequences'])}."
    
    conclusion = f"Ultimately, '{topic}' requires balanced perspectives to navigate its complexities."
    return f"{random.choice(introductions)}\n\n{body}\n\n{conclusion}"


In [None]:
#Experiment with Adversarial Techniques

def generate_adversarial_essay(topic):
    """
    Generate essays with ethical dilemmas and stylistic diversity.
    """
    dilemmas = [
        f"Is '{topic}' a tool for liberation or a means of control?",
        f"Can '{topic}' harmonize with tradition, or will it erase it?"
    ]
    styles = [
        f"Asking this question feels like treading on thin ice: '{topic}' is a double-edged sword.",
        f"'The pen is mightier than the sword,' yet '{topic}' may blur this metaphor forever."
    ]
    return f"{random.choice(dilemmas)}\n\n{random.choice(styles)}"


In [None]:
#Local Simulation and Validation

def mock_score(essay):
    """
    Simulate scoring based on variance, quality, and penalties.
    """
    avg_q = np.random.uniform(4, 9)  # Simulate quality score
    avg_e = np.random.uniform(0.8, 1)  # Simulate English confidence
    avg_s = np.random.uniform(0.2, 1)  # Simulate sequence similarity
    variance = np.random.uniform(0, 2)  # Simulate variance
    return avg_q, avg_e, avg_s, variance

# Test the mock scoring
sample_essay = "Exploring AI and its implications is a topic of ongoing debate."
mock_scores = mock_score(sample_essay)
print("Mock Scores:", mock_scores)


In [None]:
#Generate Multiple Essays per Topic

def generate_multiple_essays(topic, num_essays=3):
    """
    Generate multiple diverse essays for each topic.
    """
    essays = []
    for _ in range(num_essays):
        essays.append(random.choice([
            generate_essay_advanced(topic),
            optimize_essay(topic),
            generate_adversarial_essay(topic)
        ]))
    return essays


In [None]:
#Final Assembly and Submission

# Generate essays for all topics and select the most diverse one
submission_data = []

for idx, row in test_data.iterrows():
    topic = row['topic']
    essays = generate_multiple_essays(topic)
    
    # Simulate scores and select the essay with the highest variance
    scored_essays = [(essay, mock_score(essay)) for essay in essays]
    best_essay = max(scored_essays, key=lambda x: x[1][3])[0]  # Sort by variance
    
    submission_data.append({'id': row['id'], 'essay': best_essay})

# Create the submission DataFrame
submission = pd.DataFrame(submission_data)

# Save to CSV
submission.to_csv("submission.csv", index=False)
print("Submission file 'submission.csv' created successfully!")


In [None]:
#Visualization

import matplotlib.pyplot as plt
import seaborn as sns
import pandas as pd
import numpy as np
# Simulate performance data for visualization
def simulate_performance_data(num_samples=100):
    data = {
        'essay_id': range(1, num_samples + 1),
        'avg_q': np.random.uniform(4, 9, num_samples),  # Quality score
        'avg_e': np.random.uniform(0.8, 1, num_samples),  # English confidence
        'avg_s': np.random.uniform(0.2, 1, num_samples),  # Sequence similarity
        'avg_variance': np.random.uniform(0, 2, num_samples)  # Variance
    }
    return pd.DataFrame(data)

# Generate the data
performance_data = simulate_performance_data(100)

# Preview the data
print("Performance Data Sample:")
print(performance_data.head())
def plot_distributions(data):
    metrics = ['avg_q', 'avg_e', 'avg_s', 'avg_variance']
    plt.figure(figsize=(16, 10))
    for i, metric in enumerate(metrics, 1):
        plt.subplot(2, 2, i)
        sns.histplot(data[metric], kde=True, bins=20, color='skyblue')
        plt.title(f"Distribution of {metric}")
        plt.xlabel(metric)
        plt.ylabel("Frequency")
    plt.tight_layout()
    plt.show()

# Plot distributions
plot_distributions(performance_data)
def plot_correlation_heatmap(data):
    correlation_matrix = data[['avg_q', 'avg_e', 'avg_s', 'avg_variance']].corr()
    plt.figure(figsize=(8, 6))
    sns.heatmap(correlation_matrix, annot=True, cmap='coolwarm', fmt=".2f")
    plt.title("Correlation Heatmap of Scoring Metrics")
    plt.show()

# Plot correlation heatmap
plot_correlation_heatmap(performance_data)
def plot_metric_trends(data):
    plt.figure(figsize=(16, 6))
    for metric in ['avg_q', 'avg_e', 'avg_s', 'avg_variance']:
        plt.plot(data['essay_id'], data[metric], label=metric, marker='o', alpha=0.7)
    plt.xlabel("Essay ID")
    plt.ylabel("Score")
    plt.title("Metric Trends Across Essays")
    plt.legend()
    plt.show()

# Plot trends
plot_metric_trends(performance_data)
def plot_pairwise_relationships(data):
    sns.pairplot(data[['avg_q', 'avg_e', 'avg_s', 'avg_variance']], diag_kind='kde', corner=True)
    plt.suptitle("Pairwise Relationships of Scoring Metrics", y=1.02)
    plt.show()

# Plot pairwise relationships
plot_pairwise_relationships(performance_data)
def visualize_performance(data):
    print("Visualizing Distributions...")
    plot_distributions(data)
    
    print("Visualizing Correlations...")
    plot_correlation_heatmap(data)
    
    print("Visualizing Metric Trends...")
    plot_metric_trends(data)
    
    print("Visualizing Pairwise Relationships...")
    plot_pairwise_relationships(data)

# Visualize all metrics
visualize_performance(performance_data)
