In [92]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from fpdf import FPDF
import textwrap

# 1. Data Loading and Cleaning
def load_and_clean_data(file_path):
    # Read data
    df = pd.read_csv(file_path)
    
    # Handle missing values
    df = df.replace('NA', np.nan)
    numeric_columns = df.select_dtypes(include=[np.number]).columns
    for col in numeric_columns:
        df[col] = df[col].fillna(df[col].median())
    
    # Convert dates and create temporal features
    df['Date'] = pd.to_datetime(df['Date'])
    df['Month'] = df['Date'].dt.strftime('%b')
    df['Year'] = df['Date'].dt.year
    
    # Define and filter seasonal months
    spring_months = ['Mar', 'Apr', 'May']
    fall_months = ['Sep', 'Oct', 'Nov']
    df_seasonal = df[df['Month'].isin(spring_months + fall_months)]
    
    return df_seasonal


In [93]:
# 2. Create Monthly Summary
def create_monthly_summary(df):
    monthly_summary = df.groupby('Month').agg({
        'Count': 'sum',
        'ScientificName': 'nunique'
    }).reset_index()
    
    # Sort months chronologically
    month_order = ['Mar', 'Apr', 'May', 'Sep', 'Oct', 'Nov']
    monthly_summary['Month'] = pd.Categorical(monthly_summary['Month'],
                                            categories=month_order,
                                            ordered=True)
    return monthly_summary.sort_values('Month')


In [94]:
# 3. Create Pretty Plot
def create_pretty_plot(monthly_summary):
    # Create figure with GridSpec for more complex layout
    fig = plt.figure(figsize=(15, 12))
    gs = fig.add_gridspec(3, 2, height_ratios=[1, 1, 1])
    
    # 1. Main temporal patterns (top row, spans both columns)
    ax1 = fig.add_subplot(gs[0, :])
    
    # Dual axis plot for counts and species
    color1, color2 = '#2E8B57', '#000080'
    
    # Primary axis - Count
    ln1 = ax1.plot(monthly_summary['Month'], monthly_summary['Count'], 
                   color=color1, marker='o', linewidth=2.5, label='Individual Count')
    ax1.fill_between(monthly_summary['Month'], monthly_summary['Count'], 
                     alpha=0.3, color=color1)
    ax1.set_ylabel('Number of Individuals', color=color1, fontsize=12)
    
    # Secondary axis - Species Richness
    ax1_twin = ax1.twinx()
    ln2 = ax1_twin.plot(monthly_summary['Month'], monthly_summary['ScientificName'],
                        color=color2, marker='s', linewidth=2.5, linestyle='--', 
                        label='Species Richness')
    ax1_twin.set_ylabel('Number of Species', color=color2, fontsize=12)
    
    # Combine legends
    lns = ln1 + ln2
    labs = [l.get_label() for l in lns]
    ax1.legend(lns, labs, loc='upper right')
    
    ax1.set_title('Temporal Patterns of Slug and Snail Observations', 
                  pad=20, fontsize=14, fontweight='bold')
    
    # 2. Seasonal radar plot (middle left)
    ax2 = fig.add_subplot(gs[1, 0], projection='polar')
    theta = np.linspace(0, 2*np.pi, len(monthly_summary))
    r = monthly_summary['Count']
    ax2.plot(theta, r, color=color1)
    ax2.fill(theta, r, alpha=0.3, color=color1)
    ax2.set_xticks(theta)
    ax2.set_xticklabels(monthly_summary['Month'])
    ax2.set_title('Seasonal Distribution (Count)')
    
    # 3. Species richness radar plot (middle right)
    ax3 = fig.add_subplot(gs[1, 1], projection='polar')
    r = monthly_summary['ScientificName']
    ax3.plot(theta, r, color=color2)
    ax3.fill(theta, r, alpha=0.3, color=color2)
    ax3.set_xticks(theta)
    ax3.set_xticklabels(monthly_summary['Month'])
    ax3.set_title('Seasonal Distribution (Species)')
    
    # 4. Bubble plot (bottom row, spans both columns)
    ax4 = fig.add_subplot(gs[2, :])
    sizes = monthly_summary['ScientificName'] * 100  # Scale bubble sizes
    scatter = ax4.scatter(monthly_summary['Month'], monthly_summary['Count'],
                         s=sizes, c=monthly_summary['Count'],
                         cmap='viridis', alpha=0.6)
    ax4.set_ylabel('Number of Individuals')
    ax4.set_title('Combined View: Size = Species Richness, Color = Count')
    plt.colorbar(scatter, label='Count')
    
    plt.tight_layout()
    plt.savefig('pretty_plot.png', dpi=300, bbox_inches='tight')
    plt.close()



In [95]:
# 4. Create Illusion Plot
def create_illusion_plot(monthly_summary):
    # Create misleading figure with similar layout
    fig = plt.figure(figsize=(15, 12))
    gs = fig.add_gridspec(3, 2, height_ratios=[1, 1, 1])
    
    # 1. Misleading temporal patterns
    ax1 = fig.add_subplot(gs[0, :])
    ax1.plot(monthly_summary['Month'], monthly_summary['Count'] * 2,
             color='yellow', marker='o', linewidth=2.5)
    ax1.fill_between(monthly_summary['Month'], monthly_summary['Count'] * 2,
                     alpha=0.3, color='yellow')
    ax1.set_facecolor('lightgrey')
    ax1.set_ylabel('Count (Misleading Scale)')
    
    # 2. Misleading radar plot
    ax2 = fig.add_subplot(gs[1, 0], projection='polar')
    theta = np.linspace(0, 2*np.pi, len(monthly_summary))
    r = monthly_summary['Count'] * 1.5
    ax2.plot(theta, r, color='red')
    ax2.fill(theta, r, alpha=0.3, color='red')
    ax2.set_xticks(theta)
    ax2.set_xticklabels(monthly_summary['Month'])
    
    # 3. Misleading species radar
    ax3 = fig.add_subplot(gs[1, 1], projection='polar')
    r = monthly_summary['ScientificName'] * 2
    ax3.plot(theta, r, color='orange')
    ax3.fill(theta, r, alpha=0.3, color='orange')
    ax3.set_xticks(theta)
    ax3.set_xticklabels(monthly_summary['Month'])
    
    # 4. Misleading bubble plot
    ax4 = fig.add_subplot(gs[2, :])
    sizes = monthly_summary['ScientificName'] * 200
    scatter = ax4.scatter(monthly_summary['Month'], monthly_summary['Count'] * 1.5,
                         s=sizes, c=monthly_summary['Count'],
                         cmap='Reds', alpha=0.6)
    ax4.set_facecolor('lightgrey')
    
    plt.tight_layout()
    plt.savefig('illusion_plot.png', dpi=300, bbox_inches='tight')
    plt.close()



In [96]:
# 5. Create PDF Report
class PDF(FPDF):
    def __init__(self):
        super().__init__()
        self.set_auto_page_break(auto=True, margin=15)
        
    def chapter_title(self, title):
        self.set_font('Arial', 'B', 12)
        self.cell(0, 10, title, 0, 1, 'L')
        self.ln(5)
        
    def chapter_body(self, body):
        self.set_font('Arial', '', 11)
        # Split the text into paragraphs
        paragraphs = body.split('\n\n')
        for paragraph in paragraphs:
            # Handle bullet points and indentation
            lines = paragraph.strip().split('\n')
            for line in lines:
                # Handle indentation for bullet points
                if line.strip().startswith('-'):
                    self.cell(10)  # Add indentation
                    self.multi_cell(0, 5, line.strip())
                else:
                    self.multi_cell(0, 5, line.strip())
            self.ln(3)  # Add space between paragraphs

def create_pdf_report(pretty_caption, illusion_caption, code):
    pdf = PDF()
    
    # First page - Pretty Plot
    pdf.add_page()
    pdf.image('pretty_plot.png', x=10, y=10, w=190)
    pdf.set_y(-120)
    pdf.chapter_body(pretty_caption)
    
    # Second page - Illusion Plot
    pdf.add_page()
    pdf.image('illusion_plot.png', x=10, y=10, w=190)
    pdf.set_y(-120)
    pdf.chapter_body(illusion_caption)
    
    # Third page - Code
    pdf.add_page()
    pdf.chapter_title('Analysis Code')
    pdf.set_font('Courier', '', 8)
    for line in code.split('\n'):
        pdf.cell(0, 4, line.rstrip(), ln=True)
    
    pdf.output('final_submission.pdf')

# 6. Main Execution

def main():
    """
    Main execution function that orchestrates the entire analysis process.
    """
    try:
        # 1. Load and process data
        print("Loading and cleaning data...")
        df_seasonal = load_and_clean_data('slug-snails.csv')
        monthly_summary = create_monthly_summary(df_seasonal)
        
        # 2. Create visualizations
        print("Creating pretty plot...")
        create_pretty_plot(monthly_summary)
        print("Creating illusion plot...")
        create_illusion_plot(monthly_summary)
        
        # 3. Define captions
        pretty_caption = """Figure 1: Comprehensive Visualization of Slug and Snail Observations

        1. Top Panel - Dual Axis Plot:
        - Green lines: Individual count patterns over time
        - Blue lines: Species richness variations
        - Clear demonstration of seasonal trends
        
        2. Middle Panels - Radar Plots:
        - Left: Seasonal distribution of total counts
        - Right: Species richness distribution
        - Highlights cyclical nature of observations
        
        3. Bottom Panel - Multi-dimensional Bubble Plot:
        - X-axis: Monthly progression
        - Y-axis: Individual counts
        - Bubble size: Represents species richness
        - Color intensity: Indicates count magnitude
        
        Key Findings: The visualization reveals distinct seasonal patterns with peak activity during fall months and reduced presence during spring, aligning with known slug and snail behavior patterns."""
        
        illusion_caption = """Figure 2: Multi-panel Visualization Demonstrating Common Perceptual Illusions

        INTENTIONAL VISUAL DISTORTIONS:
        
        1. Color Choice Issues:
        - Yellow lines against light grey background creating poor contrast
        - Red coloring implying unnecessary urgency or warning
        - Inconsistent and misleading color schemes across panels
        - Poor visibility of data points against background
        
        2. Scale Manipulations:
        - Top Panel: Count values artificially doubled to exaggerate trends
        - Radar Plots: Proportions inflated (1.5x and 2x) distorting seasonal patterns
        - Bubble Plot: Sizes exaggerated by 200% misleading relative comparisons
        - Inconsistent scaling between related measurements
        
        3. Design Flaws:
        - Distracting grey backgrounds adding unnecessary visual noise
        - Removed gridlines making precise value interpretation difficult
        - Missing axis labels and proper titles
        - Poor legend placement and incomplete information
        
        Impact: These deliberate distortions highlight how poor visualization choices can significantly mislead data interpretation and compromise understanding of temporal and seasonal patterns in slug and snail observations."""
        
        # 4. Complete analysis code documentation
        code = """
        # Comprehensive Slug and Snail Analysis
        
        # 1. Required Libraries
        import pandas as pd
        import numpy as np
        import matplotlib.pyplot as plt
        import seaborn as sns
        from matplotlib.gridspec import GridSpec
        
        # 2. Data Loading and Cleaning
        df = pd.read_csv('slug-snails.csv')
        df = df.replace('NA', np.nan)
        numeric_columns = df.select_dtypes(include=[np.number]).columns
        for col in numeric_columns:
            df[col] = df[col].fillna(df[col].median())
            
        # 3. Temporal Feature Creation
        df['Date'] = pd.to_datetime(df['Date'])
        df['Month'] = df['Date'].dt.strftime('%b')
        df['Year'] = df['Date'].dt.year
        
        # 4. Seasonal Filtering
        spring_months = ['Mar', 'Apr', 'May']
        fall_months = ['Sep', 'Oct', 'Nov']
        df_seasonal = df[df['Month'].isin(spring_months + fall_months)]
        
        # 5. Monthly Statistics Calculation
        monthly_summary = df_seasonal.groupby('Month').agg({
            'Count': 'sum',
            'ScientificName': 'nunique'
        }).reset_index()
        
        # 6. Month Ordering
        month_order = ['Mar', 'Apr', 'May', 'Sep', 'Oct', 'Nov']
        monthly_summary['Month'] = pd.Categorical(
            monthly_summary['Month'],
            categories=month_order,
            ordered=True
        )
        monthly_summary = monthly_summary.sort_values('Month')
        
        # 7. Visualization Creation
        # Pretty Plot
        fig = plt.figure(figsize=(15, 12))
        gs = fig.add_gridspec(3, 2, height_ratios=[1, 1, 1])
        
        # [Please follow the ipynb notbook for properly formatted code as this code i've given just the gist of it that what I've done.]
        
        # 8. Data Analysis Results
        print("Analysis Summary:")
        print(f"Total Observations: {df_seasonal['Count'].sum()}")
        print(f"Unique Species: {df_seasonal['ScientificName'].nunique()}")
        print(f"Peak Month: {monthly_summary.loc[monthly_summary['Count'].idxmax(), 'Month']}")
        """
        
        # 5. Generate PDF report
        print("Generating PDF report...")
        create_pdf_report(pretty_caption, illusion_caption, code)
        print("Analysis complete! Check 'final_submission.pdf' for results.")
        
    except Exception as e:
        print(f"An error occurred during execution: {str(e)}")
        raise

if __name__ == "__main__":
    main()

Loading and cleaning data...
Creating pretty plot...
Creating illusion plot...
Generating PDF report...
Analysis complete! Check 'final_submission.pdf' for results.
