In [None]:
import pandas as pd
import re
import numpy as np
from scipy.stats import wilcoxon

In [None]:
df=pd.read_csv('')

In [None]:
import pandas as pd
import numpy as np
import re
from scipy.stats import wilcoxon

# --- Utility Functions ---

def extract_chart_id_and_view_type(filename):
    match = re.match(r'(.+?)_(regular|relax|aggressive)\.png', filename)
    if match:
        return match.group(1), match.group(2)
    return None, None

def extract_option_number(text):
    if isinstance(text, str):
        match = re.search(r'\b([1-7])\b', text)
        if match:
            return int(match.group(1))
    return None

def run_wilcoxon(x, y, label):
    diff = y - x
    nonzero_diff = diff[diff != 0]
    n = len(nonzero_diff)
    if n == 0:
        return f"{label} — No nonzero differences. Wilcoxon not applicable."
    w_stat, p = wilcoxon(x, y)
    mean_w = n * (n + 1) / 4
    std_w = np.sqrt(n * (n + 1) * (2 * n + 1) / 24)
    z = (w_stat - mean_w) / std_w
    return f"{label} — Wilcoxon Z = {z:.4f}, p = {p:.4e}, n = {n}"

# --- Main Loop ---

file_paths = [
    "",
    "",
    "",
    "",
    ""
    # Add more paths as needed
]

for path in file_paths:
    print(f"\n📄 File: {path}")

    try:
        df = pd.read_csv(path)

        df[['chart_id', 'view_type']] = df['image'].apply(lambda x: pd.Series(extract_chart_id_and_view_type(x)))
        df['selected_option'] = df['response'].apply(extract_option_number)
        df = df.dropna(subset=['selected_option']).reset_index(drop=True)

        pivot_df = df.pivot(index='chart_id', columns='view_type', values='selected_option').reset_index()

        total_rows_before = len(pivot_df)
        pivot_df = pivot_df.dropna(subset=['regular', 'relax', 'aggressive']).reset_index(drop=True)
        total_rows_after = len(pivot_df)

        aggr = pivot_df['aggressive']
        regular = pivot_df['regular']
        relax = pivot_df['relax']

        # Print Summary
        print(f"Total rows before cleaning: {total_rows_before}")
        print(f"Total valid rows after cleaning: {total_rows_after}")
        print(f"Discarded rows: {total_rows_before - total_rows_after}")

        # Run tests
        print(run_wilcoxon(aggr, regular, "Aggressive vs Regular"))
        print(run_wilcoxon(aggr, relax, "Aggressive vs Relax"))

    except Exception as e:
        print(f"❌ Error processing file: {e}")

    print("-------")


In [None]:
import pandas as pd
import numpy as np
import re
from scipy.stats import wilcoxon

# --- Utility Functions ---

def extract_dproj_view_and_id(filename):
    match = re.match(r'.*/(2d|3d)_.*?_(\d+)\.png', filename)
    if match:
        return match.group(2), match.group(1)  # chart_id, view_type
    return None, None

def extract_option_number(text):
    if isinstance(text, str):
        match = re.search(r'\b([1-7])\b', text)
        if match:
            return int(match.group(1))
    return None

def run_wilcoxon(x, y, label):
    diff = y - x
    nonzero_diff = diff[diff != 0]
    n = len(nonzero_diff)
    if n == 0:
        return f"{label} — No nonzero differences. Wilcoxon not applicable."
    w_stat, p = wilcoxon(x, y)
    mean_w = n * (n + 1) / 4
    std_w = np.sqrt(n * (n + 1) * (2 * n + 1) / 24)
    z = (w_stat - mean_w) / std_w
    return f"{label} — Wilcoxon Z = {z:.4f}, p = {p:.4e}, n = {n}"

# --- File Paths ---

file_paths = [
    "",
    "",
    "",
    "",
    ""
    # Add more paths as needed
]

# --- Processing Loop ---

for path in file_paths:
    print(f"\n📄 File: {path}")
    try:
        df = pd.read_csv(path)

        df[['chart_id', 'view_type']] = df['image'].apply(lambda x: pd.Series(extract_dproj_view_and_id(x)))
        df['selected_option'] = df['response'].apply(extract_option_number)
        df = df.dropna(subset=['selected_option']).reset_index(drop=True)

        # Drop duplicates to allow pivot
        df = df.drop_duplicates(subset=['chart_id', 'view_type'])

        pivot_df = df.pivot(index='chart_id', columns='view_type', values='selected_option').reset_index()

        total_rows_before = len(pivot_df)
        pivot_df = pivot_df.dropna(subset=['2d', '3d']).reset_index(drop=True)
        total_rows_after = len(pivot_df)

        d2 = pivot_df['2d']
        d3 = pivot_df['3d']

        print(f"Total rows before cleaning: {total_rows_before}")
        print(f"Total valid rows after cleaning: {total_rows_after}")
        print(f"Discarded rows: {total_rows_before - total_rows_after}")
        print(run_wilcoxon(d3, d2, "3D vs 2D"))

    except Exception as e:
        print(f"❌ Error processing file: {e}")

    print("-------")


In [None]:
import pandas as pd
import numpy as np
import re
from scipy.stats import wilcoxon

# --- Utility Functions ---

def extract_dualaxis_view_and_id(filename):
    match = re.match(r'.*_(\d+)_?(dual|regular)\.png', filename)
    if match:
        return match.group(1), match.group(2)  # chart_id, view_type
    return None, None

def extract_option_number(text):
    if isinstance(text, str):
        match = re.search(r'\b([1-7])\b', text)
        if match:
            return int(match.group(1))
    return None

def run_wilcoxon(x, y, label):
    diff = y - x
    nonzero_diff = diff[diff != 0]
    n = len(nonzero_diff)
    if n == 0:
        return f"{label} — No nonzero differences. Wilcoxon not applicable."
    w_stat, p = wilcoxon(x, y)
    mean_w = n * (n + 1) / 4
    std_w = np.sqrt(n * (n + 1) * (2 * n + 1) / 24)
    z = (w_stat - mean_w) / std_w
    return f"{label} — Wilcoxon Z = {z:.4f}, p = {p:.4e}, n = {n}"

# --- DualAxis File Paths ---

file_paths = [
    "",
    "",
    "",
    "",
    ""
    # Add more paths as needed
]

# --- Batch Processing ---

for path in file_paths:
    print(f"\n📄 File: {path}")
    try:
        df = pd.read_csv(path)

        df[['chart_id', 'view_type']] = df['image'].apply(lambda x: pd.Series(extract_dualaxis_view_and_id(x)))
        df['selected_option'] = df['response'].apply(extract_option_number)
        df = df.dropna(subset=['selected_option']).reset_index(drop=True)

        # Drop duplicate chart-view pairs
        df = df.drop_duplicates(subset=['chart_id', 'view_type'])

        # Pivot and clean
        pivot_df = df.pivot(index='chart_id', columns='view_type', values='selected_option').reset_index()
        total_rows_before = len(pivot_df)
        pivot_df = pivot_df.dropna(subset=['regular', 'dual']).reset_index(drop=True)
        total_rows_after = len(pivot_df)

        regular = pivot_df['regular']
        dual = pivot_df['dual']

        print(f"Total rows before cleaning: {total_rows_before}")
        print(f"Total valid rows after cleaning: {total_rows_after}")
        print(f"Discarded rows: {total_rows_before - total_rows_after}")
        print(run_wilcoxon(dual, regular, "Dual vs Regular"))

    except Exception as e:
        print(f"❌ Error processing file: {e}")

    print("-------")


In [None]:
import pandas as pd
import numpy as np
import re
from scipy.stats import wilcoxon

# --- Utility Functions ---

def extract_catenc_view_and_id(filename):
    match = re.match(r'.*/(\d+)_(regular|misleading)\.png', filename)
    if match:
        return match.group(1), match.group(2)  # chart_id, view_type
    return None, None

def extract_option_number(text):
    if isinstance(text, str):
        match = re.search(r'\b([1-7])\b', text)
        if match:
            return int(match.group(1))
    return None

def run_wilcoxon(x, y, label):
    diff = y - x
    nonzero_diff = diff[diff != 0]
    n = len(nonzero_diff)
    if n == 0:
        return f"{label} — No nonzero differences. Wilcoxon not applicable."
    w_stat, p = wilcoxon(x, y)
    mean_w = n * (n + 1) / 4
    std_w = np.sqrt(n * (n + 1) * (2 * n + 1) / 24)
    z = (w_stat - mean_w) / std_w
    return f"{label} — Wilcoxon Z = {z:.4f}, p = {p:.4e}, n = {n}"

# --- Categorical Encoding File Paths ---

file_paths = [
    "",
    "",
    "",
    "",
    ""
    # Add more paths as needed
]

# --- Batch Processing ---

for path in file_paths:
    print(f"\n📄 File: {path}")
    try:
        df = pd.read_csv(path)

        df[['chart_id', 'view_type']] = df['image'].apply(lambda x: pd.Series(extract_catenc_view_and_id(x)))
        df['selected_option'] = df['response'].apply(extract_option_number)
        df = df.dropna(subset=['selected_option']).reset_index(drop=True)

        # Drop duplicate chart-view pairs
        df = df.drop_duplicates(subset=['chart_id', 'view_type'])

        # Pivot to one row per chart_id
        pivot_df = df.pivot(index='chart_id', columns='view_type', values='selected_option').reset_index()
        total_rows_before = len(pivot_df)
        pivot_df = pivot_df.dropna(subset=['regular', 'misleading']).reset_index(drop=True)
        total_rows_after = len(pivot_df)

        reg = pivot_df['regular']
        mis = pivot_df['misleading']

        print(f"Total rows before cleaning: {total_rows_before}")
        print(f"Total valid rows after cleaning: {total_rows_after}")
        print(f"Discarded rows: {total_rows_before - total_rows_after}")
        print(run_wilcoxon(mis, reg, "Misleading vs Regular"))

    except Exception as e:
        print(f"❌ Error processing file: {e}")

    print("-------")


In [None]:
import pandas as pd
import numpy as np
import re
from scipy.stats import wilcoxon

# --- Utility Functions ---

def extract_view_and_id_continuous(filename):
    match = re.match(r'.*/(\d+)_(regular|misleading)\.png', filename)
    if match:
        return match.group(1), match.group(2)
    return None, None

def extract_option_number(text):
    if isinstance(text, str):
        match = re.search(r'\b([1-7])\b', text)
        if match:
            return int(match.group(1))
    return None

def run_wilcoxon(x, y, label):
    diff = y - x
    nonzero_diff = diff[diff != 0]
    n = len(nonzero_diff)
    if n == 0:
        return f"{label} — No nonzero differences. Wilcoxon not applicable."
    w_stat, p = wilcoxon(x, y)
    mean_w = n * (n + 1) / 4
    std_w = np.sqrt(n * (n + 1) * (2 * n + 1) / 24)
    z = (w_stat - mean_w) / std_w
    return f"{label} — Wilcoxon Z = {z:.4f}, p = {p:.4e}, n = {n}"

# --- Continuous Encoding File Paths ---

file_paths = [
    "",
    "",
    "",
    "",
    ""
    # Add more paths as needed
]

# --- Batch Processing Loop ---

for path in file_paths:
    print(f"\n📄 File: {path}")
    try:
        df = pd.read_csv(path)

        df[['chart_id', 'view_type']] = df['image'].apply(lambda x: pd.Series(extract_view_and_id_continuous(x)))
        df['selected_option'] = df['response'].apply(extract_option_number)
        df = df.dropna(subset=['selected_option']).reset_index(drop=True)

        # Drop duplicate chart-view pairs
        df = df.drop_duplicates(subset=['chart_id', 'view_type'])

        # Pivot to one row per chart_id
        pivot_df = df.pivot(index='chart_id', columns='view_type', values='selected_option').reset_index()
        total_rows_before = len(pivot_df)
        pivot_df = pivot_df.dropna(subset=['regular', 'misleading']).reset_index(drop=True)
        total_rows_after = len(pivot_df)

        reg = pivot_df['regular']
        mis = pivot_df['misleading']

        print(f"Total rows before cleaning: {total_rows_before}")
        print(f"Total valid rows after cleaning: {total_rows_after}")
        print(f"Discarded rows: {total_rows_before - total_rows_after}")
        print(run_wilcoxon(mis, reg, "Misleading vs Regular"))

    except Exception as e:
        print(f"❌ Error processing file: {e}")

    print("-------")


In [None]:
import pandas as pd
import numpy as np
import re
from scipy.stats import wilcoxon

# --- Utility Functions ---

def extract_invertedaxis_view_and_id(filename):
    match = re.match(r'.*/(\d+)_(Aggressive|Control)\.png', filename)
    if match:
        return match.group(1), match.group(2)  # chart_id, view_type
    return None, None

def extract_option_number(text):
    if isinstance(text, str):
        match = re.search(r'\b([1-7])\b', text)
        if match:
            return int(match.group(1))
    return None

def run_wilcoxon(x, y, label):
    diff = y - x
    nonzero_diff = diff[diff != 0]
    n = len(nonzero_diff)
    if n == 0:
        return f"{label} — No nonzero differences. Wilcoxon not applicable."
    w_stat, p = wilcoxon(x, y)
    mean_w = n * (n + 1) / 4
    std_w = np.sqrt(n * (n + 1) * (2 * n + 1) / 24)
    z = (w_stat - mean_w) / std_w
    return f"{label} — Wilcoxon Z = {z:.4f}, p = {p:.4e}, n = {n}"

# --- Inverted Axis File Paths ---

file_paths = [
    "",
    "",
    "",
    "",
    ""
    # Add more paths as needed
]

# --- Batch Processing ---

for path in file_paths:
    print(f"\n📄 File: {path}")
    try:
        df = pd.read_csv(path)

        df[['chart_id', 'view_type']] = df['image'].apply(lambda x: pd.Series(extract_invertedaxis_view_and_id(x)))
        df['selected_option'] = df['response'].apply(extract_option_number)
        df = df.dropna(subset=['selected_option']).reset_index(drop=True)

        # Drop duplicates before pivot
        df = df.drop_duplicates(subset=['chart_id', 'view_type'])

        # Pivot to compare Aggressive vs Control
        pivot_df = df.pivot(index='chart_id', columns='view_type', values='selected_option').reset_index()
        total_rows_before = len(pivot_df)
        pivot_df = pivot_df.dropna(subset=['Aggressive', 'Control']).reset_index(drop=True)
        total_rows_after = len(pivot_df)

        aggr = pivot_df['Aggressive']
        ctrl = pivot_df['Control']

        print(f"Total rows before cleaning: {total_rows_before}")
        print(f"Total valid rows after cleaning: {total_rows_after}")
        print(f"Discarded rows: {total_rows_before - total_rows_after}")
        print(run_wilcoxon(aggr, ctrl, "Aggressive vs Control"))

    except Exception as e:
        print(f"❌ Error processing file: {e}")

    print("-------")


In [None]:
import pandas as pd
import numpy as np
import re
from scipy.stats import wilcoxon

# --- Utility Functions ---

def extract_quantitydistortion_view_and_id(filename):
    match = re.match(r'.*/(\d+)_(original|deceptive)\.png', filename)
    if match:
        return match.group(1), match.group(2)
    return None, None

def extract_option_number(text):
    if isinstance(text, str):
        match = re.search(r'\b([1-7])\b', text)
        if match:
            return int(match.group(1))
    return None

def run_wilcoxon(x, y, label):
    diff = y - x
    nonzero_diff = diff[diff != 0]
    n = len(nonzero_diff)
    if n == 0:
        return f"{label} — No nonzero differences. Wilcoxon not applicable."
    w_stat, p = wilcoxon(x, y)
    mean_w = n * (n + 1) / 4
    std_w = np.sqrt(n * (n + 1) * (2 * n + 1) / 24)
    z = (w_stat - mean_w) / std_w
    return f"{label} — Wilcoxon Z = {z:.4f}, p = {p:.4e}, n = {n}"

# --- Quantity Distortion File Paths ---

file_paths = [
    "",
    "",
    "",
    "",
    ""
    # Add more paths as needed
]

# --- Batch Processing Loop ---

for path in file_paths:
    print(f"\n📄 File: {path}")
    try:
        df = pd.read_csv(path)

        df[['chart_id', 'view_type']] = df['image'].apply(lambda x: pd.Series(extract_quantitydistortion_view_and_id(x)))
        df['selected_option'] = df['response'].apply(extract_option_number)
        df = df.dropna(subset=['selected_option']).reset_index(drop=True)

        # Drop duplicate chart-view pairs
        df = df.drop_duplicates(subset=['chart_id', 'view_type'])

        # Pivot for original vs deceptive
        pivot_df = df.pivot(index='chart_id', columns='view_type', values='selected_option').reset_index()
        total_rows_before = len(pivot_df)
        pivot_df = pivot_df.dropna(subset=['original', 'deceptive']).reset_index(drop=True)
        total_rows_after = len(pivot_df)

        original = pivot_df['original']
        deceptive = pivot_df['deceptive']

        print(f"Total rows before cleaning: {total_rows_before}")
        print(f"Total valid rows after cleaning: {total_rows_after}")
        print(f"Discarded rows: {total_rows_before - total_rows_after}")
        print(run_wilcoxon(deceptive, original, "Deceptive vs Original"))

    except Exception as e:
        print(f"❌ Error processing file: {e}")

    print("-------")


In [None]:
import pandas as pd
import numpy as np
import re
from scipy.stats import wilcoxon

# --- Utility Functions ---

def extract_truncatedaxis_view_and_id(filename):
    match = re.match(r'.*_(\d+)_(Control|Aggressive)\.png', filename)
    if match:
        return match.group(1), match.group(2)  # chart_id, view_type
    return None, None

def extract_option_number(text):
    if isinstance(text, str):
        match = re.search(r'\b([1-7])\b', text)
        if match:
            return int(match.group(1))
    return None

def run_wilcoxon(x, y, label):
    diff = y - x
    nonzero_diff = diff[diff != 0]
    n = len(nonzero_diff)
    if n == 0:
        return f"{label} — No nonzero differences. Wilcoxon not applicable."
    w_stat, p = wilcoxon(x, y)
    mean_w = n * (n + 1) / 4
    std_w = np.sqrt(n * (n + 1) * (2 * n + 1) / 24)
    z = (w_stat - mean_w) / std_w
    return f"{label} — Wilcoxon Z = {z:.4f}, p = {p:.4e}, n = {n}"

# --- Truncated Axis File Paths ---

file_paths = [
    "",
    "",
    "",
    "",
    ""
    # Add more paths as needed
]

# --- Batch Processing ---

for path in file_paths:
    print(f"\n📄 File: {path}")
    try:
        df = pd.read_csv(path)

        df[['chart_id', 'view_type']] = df['image'].apply(lambda x: pd.Series(extract_truncatedaxis_view_and_id(x)))
        df['selected_option'] = df['response'].apply(extract_option_number)
        df = df.dropna(subset=['selected_option']).reset_index(drop=True)

        # Drop duplicates before pivot
        df = df.drop_duplicates(subset=['chart_id', 'view_type'])

        # Pivot to compare Aggressive vs Control
        pivot_df = df.pivot(index='chart_id', columns='view_type', values='selected_option').reset_index()
        total_rows_before = len(pivot_df)
        pivot_df = pivot_df.dropna(subset=['Aggressive', 'Control']).reset_index(drop=True)
        total_rows_after = len(pivot_df)

        aggr = pivot_df['Aggressive']
        ctrl = pivot_df['Control']

        print(f"Total rows before cleaning: {total_rows_before}")
        print(f"Total valid rows after cleaning: {total_rows_after}")
        print(f"Discarded rows: {total_rows_before - total_rows_after}")
        print(run_wilcoxon(aggr, ctrl, "Aggressive vs Control"))

    except Exception as e:
        print(f"❌ Error processing file: {e}")

    print("-------")
