# DATA VISUALIZATION:  (R )


**Workflow**:
- **STEP 0**: Environment setup & directory initialization
- **STEP 1**: Effect size distribution & forest plot (Figure 1)
- **STEP 2**: Univariate moderator forest plots (Figure 2)
- **STEP 3**: Multivariate meta-regression visualizations (Figure 3)
- **STEP 4**: Publication bias & robustness diagnostics (Figure 4)

**Dependencies**:
- `tidyverse` ≥ 2.0.0 — Data manipulation & visualization
- `ggplot2` ≥ 3.4.0 — Publication-grade graphics
- `patchwork` — Multi-panel layouts
- `scales` — Axis formatting
- `ggtext` — Rich text annotations
- `ggsci` — Scientific color palettes

**Data Structure**: See Python notebook for complete input/output specifications.

---

In [None]:
# ============================================================================
# STEP 0: ENVIRONMENT CONFIGURATION & LIBRARY INITIALIZATION
# ============================================================================

# ────────────────────────────────────────────────────────────────────────
# 0.1: Load Essential Packages
# ────────────────────────────────────────────────────────────────────────

suppressPackageStartupMessages({
  library(tidyverse)      # Data manipulation & ggplot2
  library(ggplot2)        # Grammar of graphics
  library(patchwork)      # Multi-panel layouts
  library(scales)         # Axis & color formatting
  library(ggtext)         # Rich text in plots
  library(ggsci)          # Scientific color palettes
  library(grid)           # Low-level graphics
  library(cowplot)        # Publication-ready themes
  library(ggdist)         # Distribution visualizations
  library(ggrepel)        # Smart label placement
})

cat("✓ Packages loaded successfully\n")


# ────────────────────────────────────────────────────────────────────────
# 0.2: Initialize Output Directory Infrastructure
# ────────────────────────────────────────────────────────────────────────

output_dirs <- c(
  "Data_Visualization",
  "Data_Visualization/Step1_Effect_Distribution",
  "Data_Visualization/Step2_Moderator_Forest",
  "Data_Visualization/Step3_Multivariate_Panels",
  "Data_Visualization/Step4_Diagnostics"
)

for (dir in output_dirs) {
  if (!dir.exists(dir)) {
    dir.create(dir, recursive = TRUE)
    cat(sprintf("✓ Created: %s\n", dir))
  }
}


# ────────────────────────────────────────────────────────────────────────
# 0.3: Configure ggplot2 for Publication-Quality Output
# ────────────────────────────────────────────────────────────────────────

theme_set(
  theme_minimal(base_size = 13, base_family = "sans") +
    theme(
      # Text hierarchy
      plot.title = element_text(size = 17, face = "bold", hjust = 0, 
                                margin = margin(b = 10)),
      plot.subtitle = element_text(size = 13, color = "grey30", 
                                    margin = margin(b = 15)),
      axis.title = element_text(size = 13, face = "bold", color = "grey20"),
      axis.text = element_text(size = 11, color = "grey30"),
      
      # Legend aesthetics
      legend.title = element_text(size = 11, face = "bold"),
      legend.text = element_text(size = 10.5),
      legend.position = "right",
      legend.background = element_rect(fill = "white", color = "grey80", 
                                       linewidth = 0.5),
      legend.key.size = unit(0.8, "cm"),
      
      # Panel & grid
      panel.background = element_rect(fill = "white", color = NA),
      panel.grid.major = element_line(color = "grey92", linewidth = 0.4),
      panel.grid.minor = element_blank(),
      panel.border = element_blank(),
      
      # Plot aesthetics
      plot.background = element_rect(fill = "white", color = NA),
      plot.margin = margin(20, 20, 20, 20),
      
      # Facets
      strip.text = element_text(size = 12, face = "bold", color = "grey20"),
      strip.background = element_rect(fill = "grey95", color = "grey80")
    )
)

# Global ggplot options
options(
  ggplot2.discrete.colour = scale_color_npg,
  ggplot2.discrete.fill = scale_fill_npg
)

cat("✓ ggplot2 theme configured for publication\n")


# ────────────────────────────────────────────────────────────────────────
# 0.4: Define Publication-Grade Color Palette
# ────────────────────────────────────────────────────────────────────────

# Primary color system (aligned with Python version but refined)
COLOR <- list(
  # Significance encoding: Deep blues (8-tier gradient)
  sig_hero_dark   = "#091F40",
  sig_deep        = "#0D2E5C",
  sig_very_dark   = "#154080",
  sig_dark        = "#1E56A0",
  sig_mid_dark    = "#2968BB",
  sig_mid         = "#3574CC",
  sig_light       = "#6B9CE8",
  sig_pale        = "#B8D9FF",
  sig_hero        = "#1B5592",
  sig_accent      = "#2968BB",
  
  # Non-significance: Neutral grays (6-tier gradient)
  nonsig_hero     = "#5A6B7A",
  nonsig_dark     = "#6B7680",
  nonsig_mid      = "#7E8A94",
  nonsig_light    = "#A9B8C4",
  nonsig_pale     = "#D8DFE6",
  nonsig_ultra    = "#EDF0F5",
  
  # Typography & structure
  text_hero       = "#0A0A0A",
  text_primary    = "#1A1A1A",
  text_secondary  = "#5A5A5A",
  text_light      = "#999999",
  
  # Grid & backgrounds
  grid_line       = "#D9D9D9",
  grid_mid        = "#E8E8E8",
  grid_light      = "#F5F5F5",
  plot_bg_light   = "#F5F5F5",
  bg_alt_1        = "#FCFCFC",
  bg_alt_2        = "#F8F8FB",
  bg_white        = "#FFFFFF",
  border_subtle   = "#E0E0E0",
  border_light    = "#E8E8E8"
)

# Nature-inspired palette (for Step 1 compatibility)
NATURE_COLORS <- list(
  teal   = "#26A69A",
  navy   = "#1E3A5F",
  coral  = "#FF6B6B",
  blue   = "#4A90E2",
  slate  = "#546E7A"
)

cat("✓ Color palettes defined (WCAG AAA+ compliant)\n")


# ────────────────────────────────────────────────────────────────────────
# 0.5: Utility Functions for Advanced Rendering
# ────────────────────────────────────────────────────────────────────────

# Function: Calculate optimal bin width using Freedman-Diaconis rule
calc_bin_width <- function(x) {
  iqr <- IQR(x, na.rm = TRUE)
  n <- length(na.omit(x))
  bin_width <- 2 * iqr / (n^(1/3))
  return(bin_width)
}

# Function: Create gradient color palette
make_gradient <- function(colors, n = 256) {
  colorRampPalette(colors)(n)
}

# Function: Save high-resolution outputs (PNG + PDF)
save_publication_plot <- function(plot, filename, width = 18, height = 8, 
                                  dpi = 600) {
  # PNG output
  ggsave(
    filename = paste0(filename, ".png"),
    plot = plot,
    width = width,
    height = height,
    dpi = dpi,
    bg = "white"
  )
  
  # PDF output (vector)
  ggsave(
    filename = paste0(filename, ".pdf"),
    plot = plot,
    width = width,
    height = height,
    device = cairo_pdf,
    bg = "white"
  )
  
  cat(sprintf("✓ Saved: %s.png & .pdf\n", filename))
}

cat("✓ Utility functions defined\n")


# ────────────────────────────────────────────────────────────────────────
# 0.6: Session Information
# ────────────────────────────────────────────────────────────────────────

cat("\n" %+% strrep("=", 80) %+% "\n")
cat("STEP 0 COMPLETE: Environment initialized\n")
cat(strrep("=", 80) %+% "\n")
cat(sprintf("R version: %s\n", R.version.string))
cat(sprintf("ggplot2 version: %s\n", packageVersion("ggplot2")))
cat(sprintf("Working directory: %s\n", getwd()))
cat(strrep("=", 80) %+% "\n\n")

---

## STEP 1: Effect Size Distribution & Forest Plot

**Input**: 
- `Meta_ready_cleaned.csv` — Individual effect sizes
- `Meta_Analysis_Results/Step1_Overall_Model/overall_meta_analysis_results.csv`

**Output**: 
- `Figure1_Effect_Size_Distribution.png` & `.pdf` (600 DPI)

**Design**: Two-panel layout with enhanced aesthetics:
- **Panel A**: Histogram + KDE + benchmarks (Cohen's d thresholds)
- **Panel B**: Precision-weighted forest plot with diamond summary

---

In [None]:
# ============================================================================
# STEP 1: EFFECT SIZE DISTRIBUTION & FOREST PLOT VISUALIZATION
# ============================================================================

cat("\n" %+% strrep("=", 80) %+% "\n")
cat("STEP 1: EFFECT SIZE DISTRIBUTION & FOREST PLOT\n")
cat(strrep("=", 80) %+% "\n")

# ────────────────────────────────────────────────────────────────────────
# 1.1: Data Preparation
# ────────────────────────────────────────────────────────────────────────

# Load individual effect sizes
df_raw <- read_csv("Meta_ready_cleaned.csv", show_col_types = FALSE)

# Load meta-analysis results
df_overall <- read_csv(
  "Meta_Analysis_Results/Step1_Overall_Model/overall_meta_analysis_results.csv",
  show_col_types = FALSE
)

# Extract pooled estimates
pooled_g <- df_overall %>% 
  filter(Statistic == "Effect Size (g)") %>% 
  pull(Value)

pooled_ci_lower <- df_overall %>% 
  filter(Statistic == "95% CI Lower") %>% 
  pull(Value)

pooled_ci_upper <- df_overall %>% 
  filter(Statistic == "95% CI Upper") %>% 
  pull(Value)

i_squared <- df_overall %>% 
  filter(Statistic == "I²") %>% 
  pull(Value)

tau_squared <- df_overall %>% 
  filter(Statistic == "τ²") %>% 
  pull(Value)

# Extract effect sizes for distribution analysis
effect_sizes <- df_raw %>% 
  filter(!is.na(Hedges_g)) %>% 
  pull(Hedges_g)

mean_g <- mean(effect_sizes)
median_g <- median(effect_sizes)
sd_g <- sd(effect_sizes)
n_studies <- df_raw %>% pull(Study_ID) %>% n_distinct()

cat(sprintf("✓ Data loaded: %d effect sizes from %d studies\n", 
            length(effect_sizes), n_studies))
cat(sprintf("  Pooled g = %.3f [%.3f, %.3f]\n", 
            pooled_g, pooled_ci_lower, pooled_ci_upper))

# Prepare forest plot data
df_forest <- df_raw %>%
  select(Study_ID, Hedges_g, SE, CI_Lower, CI_Upper) %>%
  filter(!is.na(Hedges_g), !is.na(SE)) %>%
  mutate(
    weight = 1 / SE^2,
    weight_norm = (weight - min(weight)) / (max(weight) - min(weight)),
    marker_size = 3 + weight_norm * 7  # Size range: 3-10
  ) %>%
  arrange(Hedges_g) %>%
  mutate(study_rank = row_number())

cat(sprintf("✓ Forest data prepared: %d studies ranked by effect size\n", 
            nrow(df_forest)))


# ────────────────────────────────────────────────────────────────────────
# 1.2: Panel A — Distribution Plot
# ────────────────────────────────────────────────────────────────────────

# Calculate optimal bin width
bin_width <- calc_bin_width(effect_sizes)
n_bins <- ceiling((max(effect_sizes) - min(effect_sizes)) / bin_width)
n_bins <- max(12, min(n_bins, 25))  # Constrain between 12-25 bins

# Create distribution plot with advanced aesthetics
p1 <- ggplot(data.frame(g = effect_sizes), aes(x = g)) +
  
  # Histogram (semi-transparent teal)
  geom_histogram(
    aes(y = after_stat(density)),
    bins = n_bins,
    fill = NATURE_COLORS$teal,
    color = "white",
    alpha = 0.6,
    linewidth = 1.2
  ) +
  
  # Kernel density curve (navy)
  geom_density(
    color = NATURE_COLORS$navy,
    linewidth = 2.2,
    adjust = 1.2
  ) +
  
  # Cohen's benchmarks (subtle vertical lines)
  geom_vline(
    xintercept = c(0.2, 0.5, 0.8),
    color = NATURE_COLORS$slate,
    linetype = "dotted",
    linewidth = 0.9,
    alpha = 0.5
  ) +
  annotate(
    "text",
    x = c(0.2, 0.5, 0.8),
    y = Inf,
    label = c("Small", "Medium", "Large"),
    vjust = 1.2,
    hjust = 1.1,
    angle = 90,
    size = 3.5,
    color = NATURE_COLORS$slate,
    alpha = 0.7,
    fontface = "italic"
  ) +
  
  # Mean line (coral, dashed)
  geom_vline(
    xintercept = mean_g,
    color = NATURE_COLORS$coral,
    linetype = "dashed",
    linewidth = 1.8,
    alpha = 0.9
  ) +
  
  # Median line (blue, dash-dot)
  geom_vline(
    xintercept = median_g,
    color = NATURE_COLORS$blue,
    linetype = "longdash",
    linewidth = 1.8,
    alpha = 0.9
  ) +
  
  # Statistical annotation box
  annotate(
    "label",
    x = min(effect_sizes) + 0.05,
    y = Inf,
    label = sprintf(
      "n = %d\nSD = %.3f\nRange: [%.2f, %.2f]",
      length(effect_sizes),
      sd_g,
      min(effect_sizes),
      max(effect_sizes)
    ),
    hjust = 0,
    vjust = 1,
    size = 3.5,
    family = "mono",
    color = NATURE_COLORS$navy,
    fill = "white",
    label.size = 0.6,
    label.padding = unit(0.5, "lines")
  ) +
  
  # Scales and labels
  scale_x_continuous(
    name = "Hedges' g (Effect Size)",
    expand = expansion(mult = c(0.02, 0.02))
  ) +
  scale_y_continuous(
    name = "Density",
    expand = expansion(mult = c(0, 0.1))
  ) +
  
  # Titles
  labs(
    title = "A. Effect Size Distribution"
  ) +
  
  # Theme refinements
  theme(
    plot.title = element_text(face = "bold", size = 15, color = NATURE_COLORS$navy),
    axis.title = element_text(face = "bold", size = 13, color = NATURE_COLORS$navy),
    axis.text = element_text(size = 11, color = NATURE_COLORS$navy),
    panel.grid.major.y = element_line(color = "grey85", linewidth = 0.6),
    panel.grid.major.x = element_blank(),
    panel.grid.minor = element_blank()
  )

cat("✓ Panel A: Distribution plot created\n")


# ────────────────────────────────────────────────────────────────────────
# 1.3: Panel B — Forest Plot
# ────────────────────────────────────────────────────────────────────────

# Create color gradient for effect sizes
color_gradient <- make_gradient(
  c("#E57373", "#FFB74D", "#FFF176", "#AED581", "#66BB6A"),
  n = 256
)

# Map effect sizes to colors
df_forest <- df_forest %>%
  mutate(
    color_val = (Hedges_g - min(Hedges_g)) / (max(Hedges_g) - min(Hedges_g)),
    color_idx = pmin(pmax(round(color_val * 255) + 1, 1), 256),
    point_color = color_gradient[color_idx]
  )

# Create forest plot
p2 <- ggplot(df_forest, aes(x = Hedges_g, y = study_rank)) +
  
  # Alternating row backgrounds
  geom_rect(
    data = df_forest %>% filter(study_rank %% 2 == 0),
    aes(xmin = -Inf, xmax = Inf, 
        ymin = study_rank - 0.45, ymax = study_rank + 0.45),
    fill = "#F8F9FA",
    alpha = 0.35,
    inherit.aes = FALSE
  ) +
  
  # Null effect reference band
  annotate(
    "rect",
    xmin = -0.05,
    xmax = 0.05,
    ymin = -Inf,
    ymax = Inf,
    fill = NATURE_COLORS$slate,
    alpha = 0.06
  ) +
  
  # Null effect line
  geom_vline(
    xintercept = 0,
    color = NATURE_COLORS$slate,
    linewidth = 1.4,
    alpha = 0.5
  ) +
  
  # Confidence intervals (multi-layer)
  geom_segment(
    aes(x = CI_Lower, xend = CI_Upper, y = study_rank, yend = study_rank,
        linewidth = weight_norm, alpha = weight_norm),
    color = NATURE_COLORS$slate,
    lineend = "round"
  ) +
  scale_linewidth_continuous(range = c(0.8, 2.5), guide = "none") +
  scale_alpha_continuous(range = c(0.4, 0.7), guide = "none") +
  
  # CI endpoints (whiskers)
  geom_segment(
    aes(x = CI_Lower, xend = CI_Lower,
        y = study_rank - 0.2, yend = study_rank + 0.2),
    color = NATURE_COLORS$slate,
    linewidth = 0.8,
    alpha = 0.6
  ) +
  geom_segment(
    aes(x = CI_Upper, xend = CI_Upper,
        y = study_rank - 0.2, yend = study_rank + 0.2),
    color = NATURE_COLORS$slate,
    linewidth = 0.8,
    alpha = 0.6
  ) +
  
  # Point estimates (triple-layer for depth)
  geom_point(
    aes(size = marker_size),
    color = NATURE_COLORS$teal,
    alpha = 0.15,
    stroke = 0
  ) +
  geom_point(
    aes(size = marker_size * 0.7),
    fill = NATURE_COLORS$teal,
    color = "white",
    shape = 21,
    alpha = 0.85,
    stroke = 1.2
  ) +
  scale_size_continuous(range = c(3, 8), guide = "none") +
  
  # Pooled estimate line
  geom_vline(
    xintercept = pooled_g,
    color = NATURE_COLORS$coral,
    linetype = "dashed",
    linewidth = 2.2,
    alpha = 0.85
  ) +
  
  # Pooled diamond (custom annotation)
  annotate(
    "polygon",
    x = c(pooled_ci_lower, pooled_g, pooled_ci_upper, pooled_g, pooled_ci_lower),
    y = c(-2, -2.5, -2, -1.5, -2),
    fill = NATURE_COLORS$coral,
    color = NATURE_COLORS$navy,
    alpha = 0.7,
    linewidth = 1.2
  ) +
  
  # Heterogeneity annotation
  annotate(
    "label",
    x = min(df_forest$CI_Lower) + 0.05,
    y = Inf,
    label = sprintf(
      "Heterogeneity\n────────────\nI² = %.1f%%\nτ² = %.3f",
      i_squared,
      tau_squared
    ),
    hjust = 0,
    vjust = 1,
    size = 3.5,
    family = "mono",
    color = NATURE_COLORS$navy,
    fill = "white",
    label.size = 0.8,
    fontface = "bold",
    label.padding = unit(0.6, "lines")
  ) +
  
  # Scales
  scale_x_continuous(
    name = "Hedges' g with 95% CI",
    expand = expansion(mult = c(0.05, 0.05))
  ) +
  scale_y_continuous(
    name = "Study (Ranked by Effect Size)",
    breaks = NULL,
    expand = expansion(mult = c(0.02, 0.05))
  ) +
  
  # Titles
  labs(
    title = "B. Forest Plot: Individual Studies"
  ) +
  
  # Theme refinements
  theme(
    plot.title = element_text(face = "bold", size = 15, color = NATURE_COLORS$navy),
    axis.title = element_text(face = "bold", size = 13, color = NATURE_COLORS$navy),
    axis.text.x = element_text(size = 11, color = NATURE_COLORS$navy),
    axis.text.y = element_blank(),
    axis.ticks.y = element_blank(),
    panel.grid.major.x = element_line(color = "grey85", linewidth = 0.6),
    panel.grid.major.y = element_blank(),
    panel.grid.minor = element_blank()
  )

cat("✓ Panel B: Forest plot created\n")


# ────────────────────────────────────────────────────────────────────────
# 1.4: Combine Panels & Export
# ────────────────────────────────────────────────────────────────────────

# Combine panels with patchwork
fig1 <- p1 + p2 +
  plot_annotation(
    title = sprintf(
      "Effect Size Distribution & Meta-Analytic Summary  |  %d Effect Sizes from k = %d Studies",
      length(effect_sizes),
      n_studies
    ),
    theme = theme(
      plot.title = element_text(
        size = 17,
        face = "bold",
        color = NATURE_COLORS$navy,
        hjust = 0.5,
        margin = margin(b = 15)
      )
    )
  )

# Save outputs
save_publication_plot(
  fig1,
  "Data_Visualization/Step1_Effect_Distribution/Figure1_Effect_Size_Distribution",
  width = 18,
  height = 8,
  dpi = 600
)

# Display figure
print(fig1)

# Summary statistics
cat("\n" %+% strrep("=", 80) %+% "\n")
cat("FIGURE 1: EFFECT SIZE DISTRIBUTION & META-ANALYTIC SUMMARY\n")
cat(strrep("=", 80) %+% "\n")
cat(sprintf("Effect sizes: n = %d | Studies: k = %d\n", 
            length(effect_sizes), n_studies))
cat(sprintf("Pooled effect: g = %.3f (95%% CI [%.3f, %.3f])\n", 
            pooled_g, pooled_ci_lower, pooled_ci_upper))
cat(sprintf("Heterogeneity: I² = %.1f%%, τ² = %.3f\n", 
            i_squared, tau_squared))
cat(sprintf("Descriptive: Mean = %.3f, Median = %.3f, SD = %.3f\n", 
            mean_g, median_g, sd_g))
cat(sprintf("Export: PNG + PDF (600 DPI)\n"))
cat(strrep("=", 80) %+% "\n\n")

---

## STEP 2: Univariate Moderator Analysis — Forest Plot

**Input**: 
- `univariate_moderator_summary.csv` — Omnibus test statistics
- `univariate_moderator_results.csv` — Level-specific estimates

**Output**: 
- `Figure2_Univariate_Moderators.png` & `.pdf` (600 DPI)

**Design**: High-density forest plot with multi-layer rendering:
- **14 moderators** stratified by domain (Training vs. Learner)
- **Multi-scale markers**: Size encodes effect intensity
- **Gradient CIs**: Significance encoded via color (blue gradient vs. gray)
- **WCAG AAA+ compliance**: Professional color hierarchy

---

In [None]:
# ============================================================================
# STEP 2: UNIVARIATE MODERATOR ANALYSIS — FOREST PLOT VISUALIZATION
# ============================================================================

cat("\n" %+% strrep("=", 100) %+% "\n")
cat("STEP 2: UNIVARIATE MODERATOR ANALYSIS — FOREST PLOT\n")
cat(strrep("=", 100) %+% "\n")

# ────────────────────────────────────────────────────────────────────────
# 2.1: Data Assembly and Preparation
# ────────────────────────────────────────────────────────────────────────

# Load omnibus test statistics
df_summary <- read_csv(
  "Meta_Analysis_Results/Step2_Moderator_Analysis/univariate_moderator_summary.csv",
  show_col_types = FALSE
)

# Load level-specific estimates
df_detailed <- read_csv(
  "Meta_Analysis_Results/Step2_Moderator_Analysis/univariate_moderator_results.csv",
  show_col_types = FALSE
)

# Aggregate CI bounds per moderator
ci_data <- df_detailed %>%
  group_by(Moderator) %>%
  summarise(
    CI_Lower = min(CI_Lower, na.rm = TRUE),
    CI_Upper = max(CI_Upper, na.rm = TRUE),
    Estimate = first(Estimate),
    .groups = "drop"
  )

# Merge and filter
df_plot <- df_summary %>%
  left_join(ci_data, by = "Moderator") %>%
  mutate(Significant = p_QM < 0.05)

# Exclude methodological moderators
excluded_mods <- c(
  "Outcome_Domain", "Design_Type", "Comparator_Type", "Rater_Type",
  "Training_TotalMinute", "Training_TotalWeeks", "Gender_Ratio_FM"
)

df_plot <- df_plot %>%
  filter(!Moderator %in% excluded_mods)

# Define display order (Training → Learner)
display_order <- c(
  # Learner characteristics
  "Age_Group",
  "L1",
  "Proficiency_Level",
  "Education_Stage",
  "English_Major",
  "Learning_Context",
  
  # Training characteristics
  "Training_Context",
  "Training_Focus",
  "Target_Feature",
  "Feedback_Type",
  "Training_Duration",
  "Instructor_Type",
  "Peer_Interaction",
  "Visual_Cue"
)

# Human-readable labels
label_map <- c(
  "Visual_Cue" = "Visual Cue",
  "Peer_Interaction" = "Peer Interaction",
  "Instructor_Type" = "Instructor Type",
  "Training_Duration" = "Training Duration",
  "Training_Duration_Weeks" = "Training Duration",
  "Treatment_Duration" = "Training Duration",
  "Feedback_Type" = "Feedback Type",
  "Target_Feature" = "Target Feature",
  "Training_Focus" = "Training Focus",
  "Focus_Type" = "Training Focus",
  "Training_Context" = "Training Context",
  "Learning_Context" = "Learning Context",
  "English_Major" = "English Majors",
  "Education_Stage" = "Education Stage",
  "Proficiency_Level" = "Proficiency Level",
  "L1" = "L1 Background",
  "Age_Group" = "Age Group"
)

# Apply ordering and labeling
df_plot <- df_plot %>%
  mutate(
    order = match(Moderator, display_order),
    order = if_else(is.na(order), 999, order),
    Label = recode(Moderator, !!!label_map, .default = Moderator)
  ) %>%
  arrange(desc(order)) %>%
  mutate(
    y_pos = row_number(),
    intensity = (Estimate - min(Estimate)) / (max(Estimate) - min(Estimate))
  )

# Categorize by domain
learner_mods <- c("Age_Group", "L1", "Proficiency_Level", 
                  "Education_Stage", "English_Major", "Learning_Context")

df_plot <- df_plot %>%
  mutate(Category = if_else(Moderator %in% learner_mods, "Learner", "Training"))

n_mods <- nrow(df_plot)
n_learner <- sum(df_plot$Category == "Learner")
n_training <- sum(df_plot$Category == "Training")

cat(sprintf("✓ Data prepared: %d moderators (%d Training, %d Learner)\n",
            n_mods, n_training, n_learner))


# ────────────────────────────────────────────────────────────────────────
# 2.2: Create Enhanced Forest Plot
# ────────────────────────────────────────────────────────────────────────

# Prepare plotting data with color assignments
df_plot <- df_plot %>%
  mutate(
    # Significance-based colors
    point_color = if_else(Significant, COLOR$sig_hero, COLOR$nonsig_mid),
    point_outline = if_else(Significant, COLOR$sig_hero_dark, COLOR$nonsig_dark),
    ci_color = if_else(Significant, COLOR$sig_mid, COLOR$nonsig_light),
    
    # Size encoding (intensity-weighted)
    point_size = if_else(
      Significant,
      3.5 + intensity * 2.5,  # Range: 3.5-6.0 for significant
      2.0 + intensity * 1.5   # Range: 2.0-3.5 for non-significant
    ),
    
    # Alpha encoding
    point_alpha = if_else(Significant, 0.95, 0.80),
    ci_alpha = if_else(Significant, 0.65, 0.45)
  )

# Create forest plot with sophisticated multi-layer design
p_forest <- ggplot(df_plot, aes(x = Estimate, y = y_pos)) +
  
  # Alternating row backgrounds
  geom_rect(
    data = df_plot %>% filter(y_pos %% 2 == 0),
    aes(xmin = -Inf, xmax = Inf, 
        ymin = y_pos - 0.48, ymax = y_pos + 0.48),
    fill = COLOR$bg_alt_1,
    alpha = 0.6,
    inherit.aes = FALSE
  ) +
  
  # Null effect reference band
  annotate(
    "rect",
    xmin = -0.042,
    xmax = 0.042,
    ymin = -Inf,
    ymax = Inf,
    fill = COLOR$grid_light,
    alpha = 0.35
  ) +
  
  # Null effect line
  geom_vline(
    xintercept = 0,
    color = COLOR$grid_line,
    linewidth = 1.5,
    alpha = 0.6
  ) +
  
  # Confidence intervals - Multi-layer rendering
  # Layer 1: Shadow (widest, most transparent)
  geom_segment(
    aes(x = CI_Lower, xend = CI_Upper, 
        y = y_pos, yend = y_pos,
        color = ci_color),
    linewidth = 6,
    alpha = 0.15,
    lineend = "round",
    show.legend = FALSE
  ) +
  
  # Layer 2: Glow (medium width)
  geom_segment(
    aes(x = CI_Lower, xend = CI_Upper, 
        y = y_pos, yend = y_pos,
        color = ci_color),
    linewidth = 3.5,
    alpha = 0.35,
    lineend = "round",
    show.legend = FALSE
  ) +
  
  # Layer 3: Core (solid, narrow)
  geom_segment(
    aes(x = CI_Lower, xend = CI_Upper, 
        y = y_pos, yend = y_pos,
        color = ci_color,
        alpha = ci_alpha),
    linewidth = 2.0,
    lineend = "round",
    show.legend = FALSE
  ) +
  
  # CI whisker caps
  geom_segment(
    aes(x = CI_Lower, xend = CI_Lower,
        y = y_pos - 0.25, yend = y_pos + 0.25,
        color = ci_color,
        alpha = ci_alpha),
    linewidth = 1.5,
    lineend = "round",
    show.legend = FALSE
  ) +
  geom_segment(
    aes(x = CI_Upper, xend = CI_Upper,
        y = y_pos - 0.25, yend = y_pos + 0.25,
        color = ci_color,
        alpha = ci_alpha),
    linewidth = 1.5,
    lineend = "round",
    show.legend = FALSE
  ) +
  
  # Point estimates - Triple-layer architecture
  # Layer 1: Halo (large, very transparent)
  geom_point(
    aes(size = point_size, fill = point_color),
    shape = 21,
    color = NA,
    alpha = 0.15,
    show.legend = FALSE
  ) +
  
  # Layer 2: Glow (medium)
  geom_point(
    aes(size = point_size * 0.7, fill = point_color),
    shape = 21,
    color = "white",
    stroke = 0.5,
    alpha = 0.5,
    show.legend = FALSE
  ) +
  
  # Layer 3: Core (solid with white border)
  geom_point(
    aes(size = point_size * 0.5, fill = point_color, alpha = point_alpha),
    shape = 21,
    color = "white",
    stroke = 1.2,
    show.legend = FALSE
  ) +
  
  # Manual color scales
  scale_color_identity() +
  scale_fill_identity() +
  scale_alpha_identity() +
  scale_size_identity() +
  
  # Y-axis: moderator labels
  scale_y_continuous(
    breaks = df_plot$y_pos,
    labels = df_plot$Label,
    expand = expansion(add = c(1, 1))
  ) +
  
  # X-axis
  scale_x_continuous(
    name = "Meta-Regression Coefficient (β) with 95% Confidence Interval",
    limits = c(-1.5, 2.5),
    expand = expansion(mult = c(0.02, 0.02))
  ) +
  
  # Labels
  labs(
    y = "Moderator Variables (Training → Learner)",
    title = "Univariate Moderator Analysis: Forest Plot with 95% Confidence Intervals",
    subtitle = "Effect Sizes Stratified by Learner and Training Characteristics"
  ) +
  
  # Theme enhancements
  theme_minimal(base_size = 14) +
  theme(
    # Title hierarchy
    plot.title = element_text(
      size = 20,
      face = "bold",
      color = COLOR$text_hero,
      margin = margin(b = 5)
    ),
    plot.subtitle = element_text(
      size = 14,
      color = COLOR$text_primary,
      margin = margin(b = 20)
    ),
    
    # Axis styling
    axis.title.x = element_text(
      size = 16,
      face = "bold",
      color = COLOR$text_hero,
      margin = margin(t = 15)
    ),
    axis.title.y = element_text(
      size = 15,
      face = "bold",
      color = COLOR$text_primary,
      margin = margin(r = 15)
    ),
    axis.text.x = element_text(
      size = 14,
      color = COLOR$text_secondary
    ),
    axis.text.y = element_text(
      size = 16,
      face = "plain",
      color = COLOR$text_primary,
      hjust = 1
    ),
    
    # Panel styling
    panel.background = element_rect(fill = COLOR$bg_white, color = NA),
    panel.grid.major.x = element_line(
      color = COLOR$grid_mid,
      linewidth = 0.3,
      linetype = "dotted"
    ),
    panel.grid.major.y = element_blank(),
    panel.grid.minor = element_blank(),
    
    # Plot area
    plot.background = element_rect(fill = "white", color = NA),
    plot.margin = margin(25, 25, 25, 25)
  )

# Add significance legend manually
p_forest <- p_forest +
  annotate(
    "point",
    x = 2.1,
    y = c(n_mods * 0.25, n_mods * 0.15),
    size = c(5, 3.5),
    shape = 21,
    fill = c(COLOR$sig_hero, COLOR$nonsig_mid),
    color = "white",
    stroke = 1.2
  ) +
  annotate(
    "text",
    x = 2.25,
    y = c(n_mods * 0.25, n_mods * 0.15),
    label = c("Significant (p < 0.05)", "Non-significant (p ≥ 0.05)"),
    hjust = 0,
    size = 5,
    fontface = "bold",
    color = COLOR$text_primary
  ) +
  annotate(
    "text",
    x = 2.05,
    y = n_mods * 0.32,
    label = "Significance",
    hjust = 0,
    size = 5.5,
    fontface = "bold",
    color = COLOR$text_hero
  )

cat("✓ Forest plot created with multi-layer rendering\n")


# ────────────────────────────────────────────────────────────────────────
# 2.3: Export High-Resolution Output
# ────────────────────────────────────────────────────────────────────────

# Calculate dynamic figure height
row_height <- 0.68
margin_vertical <- 4.8
fig_height <- margin_vertical + n_mods * row_height
fig_width <- 24

# Save publication-quality outputs
save_publication_plot(
  p_forest,
  "Data_Visualization/Step2_Moderator_Forest/Figure2_Univariate_Moderators",
  width = fig_width,
  height = fig_height,
  dpi = 600
)

# Display
print(p_forest)


# ────────────────────────────────────────────────────────────────────────
# 2.4: Summary Statistics
# ────────────────────────────────────────────────────────────────────────

cat("\n" %+% strrep("=", 100) %+% "\n")
cat("STEP 2 COMPLETE: UNIVARIATE MODERATOR FOREST PLOT\n")
cat(strrep("=", 100) %+% "\n")

cat("\nDATA SUMMARY:\n")
cat(sprintf("  Moderators analyzed: %d total\n", n_mods))
cat(sprintf("    - Training characteristics: %d\n", n_training))
cat(sprintf("    - Learner characteristics: %d\n", n_learner))

cat("\nSTATISTICAL SUMMARY:\n")
cat(sprintf("  Significant moderators: %d\n", sum(df_plot$Significant)))
cat(sprintf("  Non-significant moderators: %d\n", sum(!df_plot$Significant)))
cat(sprintf("  Effect coefficient range: β ∈ [%.4f, %.4f]\n",
            min(df_plot$Estimate), max(df_plot$Estimate)))
cat(sprintf("  Confidence interval range: [%.4f, %.4f]\n",
            min(df_plot$CI_Lower), max(df_plot$CI_Upper)))

cat("\nVISUALIZATION SPECIFICATIONS:\n")
cat(sprintf("  Canvas dimensions: %.1f\" × %.2f\" (600 DPI)\n", fig_width, fig_height))
cat("  Color palette: 8-tier blues + 6-tier grays (WCAG AAA+)\n")
cat("  Rendering: Triple-layer architecture (shadow-glow-core)\n")
cat("  Typography: Premium sans-serif hierarchy\n")

cat("\nOUTPUTS:\n")
cat("  PNG (raster, 600 DPI)\n")
cat("  PDF (vector)\n")
cat(strrep("=", 100) %+% "\n\n")

---

## STEP 3: Multivariate Meta-Regression Visualization

**Input**: 
- `Meta_ready_cleaned.csv` — Raw effect sizes with moderator variables
- `multivariate_model_coefficients.csv` — Adjusted moderator effects

**Output**: 
- `Figure3_Multivariate_Meta_Regression.png` & `.pdf` (600 DPI)

**Design**: Four-panel dashboard examining key moderators:
- **Panel A**: Education Level (violin + box plot)
- **Panel B**: English Major Status (violin + box plot)
- **Panel C**: First Language/L1 (forest plot with gradient colors)
- **Panel D**: Treatment Duration (violin + box plot)

**Color Logic**: Mean-ranked encoding (Green = highest, Purple = middle, Blue = lowest)

---

In [None]:
# ============================================================================
# STEP 3: MULTIVARIATE META-REGRESSION VISUALIZATION
# ============================================================================

cat("\n" %+% strrep("=", 80) %+% "\n")
cat("STEP 3: MULTIVARIATE META-REGRESSION VISUALIZATION\n")
cat(strrep("=", 80) %+% "\n")

# ────────────────────────────────────────────────────────────────────────
# 3.1: Data Preparation
# ────────────────────────────────────────────────────────────────────────

# Load multivariate coefficients
df_coef <- read_csv(
  "Meta_Analysis_Results/Step2_Moderator_Analysis/multivariate_model_coefficients.csv",
  show_col_types = FALSE
)

# Load raw effect sizes
df_raw_step3 <- read_csv("Meta_ready_cleaned.csv", show_col_types = FALSE)

# Define color palette (Green-Purple-Orange-Blue system)
COLORS_FIG3 <- list(
  primary = c("#2E7D32", "#7B1FA2", "#EF6C00", "#1976D2"),  # Green, Purple, Orange, Blue
  gradient = c("#66BB6A", "#BA68C8", "#FFB74D", "#64B5F6"),
  neutral = "#263238",
  bg = "#FAFAFA",
  grid = "#E0E0E0"
)

cat("✓ Data loaded and color palette defined\n")


# ────────────────────────────────────────────────────────────────────────
# 3.2: Panel A — Education Level (Violin Plot)
# ────────────────────────────────────────────────────────────────────────

create_violin_panel <- function(data, moderator, title, panel_label) {
  # Extract data
  plot_data <- data %>%
    filter(!is.na(.data[[moderator]])) %>%
    mutate(level = as.factor(.data[[moderator]]))
  
  # Calculate means for color assignment
  level_means <- plot_data %>%
    group_by(level) %>%
    summarise(mean_g = mean(Hedges_g, na.rm = TRUE), .groups = "drop") %>%
    arrange(mean_g) %>%
    mutate(
      rank = row_number(),
      color = case_when(
        n() == 2 ~ if_else(rank == 1, COLORS_FIG3$primary[4], COLORS_FIG3$primary[1]),
        n() == 3 ~ c(COLORS_FIG3$primary[4], COLORS_FIG3$primary[2], COLORS_FIG3$primary[1])[rank],
        TRUE ~ COLORS_FIG3$primary[(rank - 1) %% 4 + 1]
      )
    )
  
  # Join colors back
  plot_data <- plot_data %>%
    left_join(level_means %>% select(level, color), by = "level")
  
  # Calculate statistics for annotation
  stats_data <- plot_data %>%
    group_by(level) %>%
    summarise(
      n = n(),
      mean = mean(Hedges_g, na.rm = TRUE),
      median = median(Hedges_g, na.rm = TRUE),
      sd = sd(Hedges_g, na.rm = TRUE),
      color = first(color),
      .groups = "drop"
    )
  
  # Create plot
  p <- ggplot(plot_data, aes(x = level, y = Hedges_g)) +
    
    # Violin layers (dual-layer: outer glow + inner solid)
    geom_violin(
      aes(fill = color),
      alpha = 0.25,
      trim = FALSE,
      scale = "width",
      width = 1.1,
      color = NA
    ) +
    geom_violin(
      aes(fill = color),
      alpha = 0.65,
      trim = FALSE,
      scale = "width",
      width = 0.9,
      color = "white",
      linewidth = 1.5
    ) +
    
    # Box plot overlay
    geom_boxplot(
      width = 0.25,
      fill = "white",
      color = COLORS_FIG3$neutral,
      linewidth = 1.2,
      alpha = 0.95,
      outlier.shape = 23,
      outlier.fill = COLORS_FIG3$primary[1],
      outlier.color = "white",
      outlier.size = 3,
      outlier.stroke = 1.2
    ) +
    
    # Mean markers (diamond shape, dual-layer)
    stat_summary(
      fun = mean,
      geom = "point",
      aes(fill = color),
      shape = 23,
      size = 8,
      color = "white",
      stroke = 1.8,
      alpha = 0.95,
      show.legend = if (panel_label == "A") TRUE else FALSE
    ) +
    
    # Statistical annotations
    geom_text(
      data = stats_data,
      aes(
        x = level,
        y = Inf,
        label = sprintf("M = %.3f\nMd = %.3f\nSD = %.3f", mean, median, sd)
      ),
      vjust = 1.1,
      size = 4.5,
      fontface = "bold",
      color = COLORS_FIG3$neutral,
      lineheight = 0.9
    ) +
    
    # Color scales
    scale_fill_identity() +
    scale_color_identity() +
    
    # Axes
    scale_x_discrete(
      labels = function(x) paste0(x, "\n(n=", stats_data$n, ")")
    ) +
    scale_y_continuous(
      name = "Effect Size (Hedges' g)",
      expand = expansion(mult = c(0.05, 0.15))
    ) +
    
    # Labels
    labs(
      title = paste0(panel_label, ". ", title)
    ) +
    
    # Theme
    theme_minimal(base_size = 14) +
    theme(
      plot.title = element_text(
        size = 19,
        face = "bold",
        color = COLORS_FIG3$neutral,
        margin = margin(b = 15)
      ),
      axis.title.x = element_blank(),
      axis.title.y = element_text(
        size = 16,
        face = "bold",
        color = COLORS_FIG3$neutral,
        margin = margin(r = 10)
      ),
      axis.text = element_text(
        size = 14,
        face = "bold",
        color = COLORS_FIG3$neutral
      ),
      panel.background = element_rect(fill = COLORS_FIG3$bg, color = NA),
      panel.grid.major.y = element_line(
        color = COLORS_FIG3$grid,
        linewidth = 0.4,
        linetype = "dotted"
      ),
      panel.grid.major.x = element_blank(),
      panel.grid.minor = element_blank(),
      plot.background = element_rect(fill = "white", color = NA),
      plot.margin = margin(15, 15, 15, 15)
    )
  
  # Add legend for Panel A
  if (panel_label == "A") {
    p <- p +
      labs(fill = "Mean Effect") +
      theme(
        legend.position = c(0.85, 0.15),
        legend.title = element_text(size = 13, face = "bold"),
        legend.text = element_text(size = 12),
        legend.background = element_rect(
          fill = "white",
          color = COLORS_FIG3$neutral,
          linewidth = 1
        ),
        legend.key.size = unit(0.8, "cm")
      )
  }
  
  return(p)
}

# Create Panel A
p3a <- create_violin_panel(
  df_raw_step3,
  "Education_Stage",
  "Education Level",
  "A"
)

cat("✓ Panel A created: Education Level\n")


# ────────────────────────────────────────────────────────────────────────
# 3.3: Panel B — English Major Status
# ────────────────────────────────────────────────────────────────────────

p3b <- create_violin_panel(
  df_raw_step3,
  "English_Major",
  "English Major Status",
  "B"
)

cat("✓ Panel B created: English Major Status\n")


# ────────────────────────────────────────────────────────────────────────
# 3.4: Panel C — First Language (L1) Forest Plot
# ────────────────────────────────────────────────────────────────────────

# Filter L1 data
df_l1 <- df_coef %>%
  filter(Moderator_Family == "L1") %>%
  arrange(desc(Estimate)) %>%
  mutate(y_pos = row_number())

# 7-color gradient for languages
color_gradient_l1 <- c(
  "#2E7D32",  # Deep green
  "#66BB6A",  # Light green
  "#7B1FA2",  # Deep purple
  "#BA68C8",  # Light purple
  "#EF6C00",  # Deep orange
  "#FFB74D",  # Light orange
  "#1976D2"   # Blue
)

df_l1 <- df_l1 %>%
  mutate(
    color = color_gradient_l1[pmin(row_number(), length(color_gradient_l1))]
  )

# Get sample metadata
l1_metadata <- df_raw_step3 %>%
  filter(!is.na(L1)) %>%
  group_by(L1) %>%
  summarise(
    k_studies = n_distinct(Study_ID),
    n_total = sum(N_Exp, na.rm = TRUE),
    .groups = "drop"
  )

df_l1 <- df_l1 %>%
  left_join(
    l1_metadata,
    by = c("Level_Label" = "L1")
  )

# Create forest plot
p3c <- ggplot(df_l1, aes(x = Estimate, y = reorder(Level_Label, Estimate))) +
  
  # Alternating backgrounds
  geom_rect(
    data = df_l1 %>% filter(row_number() %% 2 == 0),
    aes(xmin = -Inf, xmax = Inf, 
        ymin = as.numeric(factor(Level_Label)) - 0.48,
        ymax = as.numeric(factor(Level_Label)) + 0.48),
    fill = COLORS_FIG3$bg,
    alpha = 0.8,
    inherit.aes = FALSE
  ) +
  
  # Null reference
  annotate(
    "rect",
    xmin = -0.03,
    xmax = 0.03,
    ymin = -Inf,
    ymax = Inf,
    fill = COLORS_FIG3$grid,
    alpha = 0.5
  ) +
  geom_vline(
    xintercept = 0,
    color = COLORS_FIG3$neutral,
    linewidth = 1.2,
    alpha = 0.5
  ) +
  
  # CIs - Triple layer
  geom_segment(
    aes(x = CI_Lower, xend = CI_Upper,
        y = Level_Label, yend = Level_Label,
        color = color),
    linewidth = 6,
    alpha = 0.2,
    lineend = "round"
  ) +
  geom_segment(
    aes(x = CI_Lower, xend = CI_Upper,
        y = Level_Label, yend = Level_Label,
        color = color),
    linewidth = 3,
    alpha = 0.5,
    lineend = "round"
  ) +
  geom_segment(
    aes(x = CI_Lower, xend = CI_Upper,
        y = Level_Label, yend = Level_Label,
        color = color),
    linewidth = 1.8,
    alpha = 0.85,
    lineend = "round"
  ) +
  
  # Point estimates (diamond)
  geom_point(
    aes(fill = color),
    shape = 23,
    size = 7,
    color = "white",
    stroke = 1.8,
    alpha = 0.95
  ) +
  
  # Metadata annotations
  geom_text(
    aes(
      x = min(CI_Lower) - 0.15,
      label = sprintf("k=%d  n=%s", k_studies, scales::comma(n_total))
    ),
    hjust = 1,
    size = 3.8,
    fontface = "italic",
    color = COLORS_FIG3$neutral
  ) +
  
  # Coefficient values
  geom_label(
    aes(
      x = CI_Upper + abs(CI_Upper - CI_Lower) * 0.15,
      label = sprintf("%+.3f", Estimate),
      fill = color
    ),
    hjust = 0,
    size = 4,
    fontface = "bold",
    color = "white",
    label.size = 0,
    label.padding = unit(0.3, "lines")
  ) +
  
  # Scales
  scale_color_identity() +
  scale_fill_identity() +
  scale_x_continuous(
    name = "β Coefficient (Multivariate Model)",
    expand = expansion(mult = c(0.15, 0.15))
  ) +
  scale_y_discrete(name = NULL) +
  
  # Labels
  labs(title = "C. First Language (L1)") +
  
  # Theme
  theme_minimal(base_size = 14) +
  theme(
    plot.title = element_text(
      size = 19,
      face = "bold",
      color = COLORS_FIG3$neutral,
      margin = margin(b = 15)
    ),
    axis.title.x = element_text(
      size = 16,
      face = "bold",
      color = COLORS_FIG3$neutral,
      margin = margin(t = 10)
    ),
    axis.text.x = element_text(
      size = 14,
      color = COLORS_FIG3$neutral
    ),
    axis.text.y = element_text(
      size = 15,
      face = "bold",
      color = COLORS_FIG3$neutral
    ),
    panel.background = element_rect(fill = COLORS_FIG3$bg, color = NA),
    panel.grid.major.x = element_line(
      color = COLORS_FIG3$grid,
      linewidth = 0.3,
      linetype = "dotted"
    ),
    panel.grid.major.y = element_blank(),
    panel.grid.minor = element_blank(),
    plot.background = element_rect(fill = "white", color = NA),
    plot.margin = margin(15, 15, 15, 15)
  )

cat("✓ Panel C created: L1 Background\n")


# ────────────────────────────────────────────────────────────────────────
# 3.5: Panel D — Treatment Duration
# ────────────────────────────────────────────────────────────────────────

p3d <- create_violin_panel(
  df_raw_step3,
  "Treatment_Duration",
  "Treatment Duration",
  "D"
)

cat("✓ Panel D created: Treatment Duration\n")


# ────────────────────────────────────────────────────────────────────────
# 3.6: Combine & Export
# ────────────────────────────────────────────────────────────────────────

# Combine all panels
fig3 <- (p3a + p3b) / (p3c + p3d) +
  plot_annotation(
    title = "Multivariate Meta-Regression Analysis: Key Moderating Factors",
    theme = theme(
      plot.title = element_text(
        size = 24,
        face = "bold",
        color = COLORS_FIG3$neutral,
        hjust = 0.5,
        margin = margin(b = 20)
      )
    )
  )

# Save
save_publication_plot(
  fig3,
  "Data_Visualization/Step3_Multivariate_Panels/Figure3_Multivariate_Meta_Regression",
  width = 24,
  height = 16,
  dpi = 600
)

# Display
print(fig3)

cat("\n" %+% strrep("=", 80) %+% "\n")
cat("✓ STEP 3 COMPLETE: Multivariate meta-regression dashboard exported\n")
cat(strrep("=", 80) %+% "\n\n")

---

## **Step 4: Publication Bias & Robustness Diagnostics**

**Input Files:**
- `Meta_ready_cleaned.csv` (Raw effect sizes)
- `leave_one_out_analysis.csv` (LOO sensitivity metrics)
- `publication_bias_tests.csv` (Egger test results)

**Output:** `Figure4_Publication_Bias_and_Robustness.png` / `.pdf`

**Design Specifications:**
1. **Panel A:** Funnel plot with enhanced precision contours + Egger test annotation
2. **Panel B:** Leave-One-Out sensitivity analysis with confidence bands

**Color Harmony:** Blue-Crimson-Emerald-Amber system for diagnostic clarity
- Significant effects: Blue gradient (#1976D2 → #42A5F5)
- Non-significant: Crimson (#C62828 → #E57373)
- Confidence bands: Emerald (#2E7D32) + Amber (#FFA000)

---

In [None]:
# ============================================================================
# STEP 4: PUBLICATION BIAS & ROBUSTNESS DIAGNOSTICS
# ============================================================================

cat("\n" %+% strrep("=", 80) %+% "\n")
cat("STEP 4: PUBLICATION BIAS & ROBUSTNESS DIAGNOSTICS\n")
cat(strrep("=", 80) %+% "\n")

# ────────────────────────────────────────────────────────────────────────
# 4.1: Data Loading
# ────────────────────────────────────────────────────────────────────────

# Load raw data
df_raw_step4 <- read_csv("Meta_ready_cleaned.csv", show_col_types = FALSE)

# Load LOO analysis
df_loo <- read_csv(
  "Meta_Analysis_Results/Step4_Sensitivity_Analysis/leave_one_out_analysis.csv",
  show_col_types = FALSE
)

# Load publication bias tests
pub_bias_results <- read_csv(
  "Meta_Analysis_Results/Step4_Sensitivity_Analysis/publication_bias_tests.csv",
  show_col_types = FALSE
)

# Define color palette (Blue-Crimson-Emerald-Amber system)
COLORS_FIG4 <- list(
  sig_blue = c("#0D47A1", "#1565C0", "#1976D2", "#1E88E5", "#2196F3", "#42A5F5"),
  nonsig_crimson = c("#B71C1C", "#C62828", "#D32F2F", "#E53935", "#EF5350", "#E57373"),
  emerald = c("#1B5E20", "#2E7D32", "#388E3C", "#43A047"),
  amber = c("#FF6F00", "#FF8F00", "#FFA000", "#FFB300"),
  neutral = "#263238",
  bg = "#FAFAFA",
  grid = "#E0E0E0"
)

cat("✓ Data loaded and color palette defined\n")


# ────────────────────────────────────────────────────────────────────────
# 4.2: Panel A — Funnel Plot with Enhanced Precision Contours
# ────────────────────────────────────────────────────────────────────────

# Prepare funnel data
df_funnel <- df_raw_step4 %>%
  mutate(
    SE = SE_Hedges_g,
    precision = 1 / SE,
    significant = `p-value` < 0.05,
    color_cat = case_when(
      Hedges_g >= 1.5 ~ if_else(significant, COLORS_FIG4$sig_blue[1], COLORS_FIG4$nonsig_crimson[1]),
      Hedges_g >= 1.0 ~ if_else(significant, COLORS_FIG4$sig_blue[2], COLORS_FIG4$nonsig_crimson[2]),
      Hedges_g >= 0.8 ~ if_else(significant, COLORS_FIG4$sig_blue[3], COLORS_FIG4$nonsig_crimson[3]),
      Hedges_g >= 0.5 ~ if_else(significant, COLORS_FIG4$sig_blue[4], COLORS_FIG4$nonsig_crimson[4]),
      Hedges_g >= 0.2 ~ if_else(significant, COLORS_FIG4$sig_blue[5], COLORS_FIG4$nonsig_crimson[5]),
      TRUE ~ if_else(significant, COLORS_FIG4$sig_blue[6], COLORS_FIG4$nonsig_crimson[6])
    ),
    point_size = case_when(
      Hedges_g >= 1.5 ~ 7,
      Hedges_g >= 1.0 ~ 6,
      Hedges_g >= 0.8 ~ 5,
      Hedges_g >= 0.5 ~ 4,
      Hedges_g >= 0.2 ~ 3,
      TRUE ~ 2
    )
  )

# Calculate pooled estimate (for funnel center)
pooled_g <- weighted.mean(df_funnel$Hedges_g, 1/df_funnel$SE^2)

# Extract Egger test results
egger_result <- pub_bias_results %>%
  filter(Test == "Egger's Test") %>%
  slice(1)

egger_z <- egger_result$`Z-value`
egger_p <- egger_result$`P-value`

# Create confidence contours
max_se <- max(df_funnel$SE, na.rm = TRUE) * 1.05
se_range <- seq(0.001, max_se, length.out = 100)

# 95% CI contours
ci_contour_upper <- pooled_g + 1.96 * se_range
ci_contour_lower <- pooled_g - 1.96 * se_range

# 99% CI contours
ci_99_upper <- pooled_g + 2.576 * se_range
ci_99_lower <- pooled_g - 2.576 * se_range

# Combine into data frames
contour_95 <- tibble(
  SE = c(se_range, rev(se_range)),
  Hedges_g = c(ci_contour_upper, rev(ci_contour_lower))
)

contour_99 <- tibble(
  SE = c(se_range, rev(se_range)),
  Hedges_g = c(ci_99_upper, rev(ci_99_lower))
)

# Create funnel plot
p4a <- ggplot() +
  
  # 99% contour (lightest)
  geom_polygon(
    data = contour_99,
    aes(x = Hedges_g, y = SE),
    fill = COLORS_FIG4$emerald[1],
    alpha = 0.08
  ) +
  
  # 95% contour (medium)
  geom_polygon(
    data = contour_95,
    aes(x = Hedges_g, y = SE),
    fill = COLORS_FIG4$emerald[2],
    alpha = 0.15
  ) +
  
  # Pooled estimate reference
  geom_vline(
    xintercept = pooled_g,
    color = COLORS_FIG4$neutral,
    linewidth = 1.5,
    linetype = "dashed",
    alpha = 0.6
  ) +
  
  # Effect size points (triple-layer)
  geom_point(
    data = df_funnel,
    aes(x = Hedges_g, y = SE, color = color_cat, size = point_size),
    alpha = 0.15,
    shape = 16
  ) +
  geom_point(
    data = df_funnel,
    aes(x = Hedges_g, y = SE, color = color_cat, size = point_size * 0.7),
    alpha = 0.45,
    shape = 16
  ) +
  geom_point(
    data = df_funnel,
    aes(x = Hedges_g, y = SE, color = color_cat, size = point_size * 0.4),
    alpha = 0.85,
    shape = 16,
    stroke = 0.8
  ) +
  
  # Egger test annotation
  annotate(
    "richtext",
    x = min(df_funnel$Hedges_g) + 0.1,
    y = max_se * 0.95,
    label = sprintf(
      "<b>Egger's Test</b><br>Z = %.3f, <i>p</i> %s %.3f",
      egger_z,
      if_else(egger_p < 0.001, "<", "="),
      if_else(egger_p < 0.001, 0.001, egger_p)
    ),
    hjust = 0,
    vjust = 1,
    size = 5,
    fill = "white",
    color = COLORS_FIG4$neutral,
    label.color = COLORS_FIG4$emerald[3],
    label.size = 1.5,
    label.padding = unit(0.5, "lines"),
    fontface = "bold"
  ) +
  
  # Scales
  scale_color_identity() +
  scale_size_identity() +
  scale_x_continuous(
    name = "Effect Size (Hedges' g)",
    expand = expansion(mult = c(0.1, 0.1))
  ) +
  scale_y_reverse(
    name = "Standard Error",
    expand = expansion(mult = c(0.05, 0.05))
  ) +
  
  # Labels
  labs(title = "A. Funnel Plot: Publication Bias Assessment") +
  
  # Theme
  theme_minimal(base_size = 14) +
  theme(
    plot.title = element_text(
      size = 19,
      face = "bold",
      color = COLORS_FIG4$neutral,
      margin = margin(b = 15)
    ),
    axis.title = element_text(
      size = 16,
      face = "bold",
      color = COLORS_FIG4$neutral
    ),
    axis.text = element_text(
      size = 14,
      color = COLORS_FIG4$neutral
    ),
    panel.background = element_rect(fill = COLORS_FIG4$bg, color = NA),
    panel.grid.major = element_line(
      color = COLORS_FIG4$grid,
      linewidth = 0.3,
      linetype = "dotted"
    ),
    panel.grid.minor = element_blank(),
    plot.background = element_rect(fill = "white", color = NA),
    plot.margin = margin(15, 15, 15, 15)
  )

cat("✓ Panel A created: Funnel plot\n")


# ────────────────────────────────────────────────────────────────────────
# 4.3: Panel B — Leave-One-Out Sensitivity Analysis
# ────────────────────────────────────────────────────────────────────────

# Prepare LOO data
df_loo_plot <- df_loo %>%
  arrange(LOO_Estimate) %>%
  mutate(
    y_pos = row_number(),
    influential = abs(LOO_Estimate - pooled_g) > 0.1,
    color = if_else(influential, COLORS_FIG4$amber[1], COLORS_FIG4$sig_blue[3])
  )

# Calculate overall CI bounds (global confidence band)
overall_ci_lower <- min(df_loo_plot$LOO_CI_Lower, na.rm = TRUE)
overall_ci_upper <- max(df_loo_plot$LOO_CI_Upper, na.rm = TRUE)

# Create LOO plot
p4b <- ggplot(df_loo_plot, aes(x = LOO_Estimate, y = reorder(Excluded_Study, LOO_Estimate))) +
  
  # Alternating backgrounds
  geom_rect(
    data = df_loo_plot %>% filter(row_number() %% 2 == 0),
    aes(
      xmin = -Inf,
      xmax = Inf,
      ymin = as.numeric(factor(Excluded_Study)) - 0.48,
      ymax = as.numeric(factor(Excluded_Study)) + 0.48
    ),
    fill = COLORS_FIG4$bg,
    alpha = 0.7,
    inherit.aes = FALSE
  ) +
  
  # Overall confidence band
  annotate(
    "rect",
    xmin = overall_ci_lower,
    xmax = overall_ci_upper,
    ymin = -Inf,
    ymax = Inf,
    fill = COLORS_FIG4$emerald[1],
    alpha = 0.12
  ) +
  
  # Pooled estimate reference
  geom_vline(
    xintercept = pooled_g,
    color = COLORS_FIG4$neutral,
    linewidth = 1.5,
    linetype = "dashed",
    alpha = 0.6
  ) +
  
  # CIs (triple-layer)
  geom_segment(
    aes(
      x = LOO_CI_Lower,
      xend = LOO_CI_Upper,
      y = Excluded_Study,
      yend = Excluded_Study,
      color = color
    ),
    linewidth = 5,
    alpha = 0.2,
    lineend = "round"
  ) +
  geom_segment(
    aes(
      x = LOO_CI_Lower,
      xend = LOO_CI_Upper,
      y = Excluded_Study,
      yend = Excluded_Study,
      color = color
    ),
    linewidth = 2.5,
    alpha = 0.5,
    lineend = "round"
  ) +
  geom_segment(
    aes(
      x = LOO_CI_Lower,
      xend = LOO_CI_Upper,
      y = Excluded_Study,
      yend = Excluded_Study,
      color = color
    ),
    linewidth = 1.5,
    alpha = 0.85,
    lineend = "round"
  ) +
  
  # Point estimates (diamond for influential, circle for stable)
  geom_point(
    aes(fill = color, shape = influential),
    size = 6,
    color = "white",
    stroke = 1.5,
    alpha = 0.95
  ) +
  scale_shape_manual(
    values = c("FALSE" = 21, "TRUE" = 23),
    guide = "none"
  ) +
  
  # Estimate labels
  geom_label(
    aes(
      x = LOO_CI_Upper + abs(LOO_CI_Upper - LOO_CI_Lower) * 0.1,
      label = sprintf("%.3f", LOO_Estimate),
      fill = color
    ),
    hjust = 0,
    size = 3.5,
    fontface = "bold",
    color = "white",
    label.size = 0,
    label.padding = unit(0.25, "lines")
  ) +
  
  # Influential study markers
  geom_text(
    data = df_loo_plot %>% filter(influential),
    aes(
      x = min(LOO_CI_Lower) - 0.05,
      label = "⚠"
    ),
    hjust = 1,
    size = 5,
    color = COLORS_FIG4$amber[1]
  ) +
  
  # Scales
  scale_color_identity() +
  scale_fill_identity() +
  scale_x_continuous(
    name = "Effect Size (Hedges' g) with Study Excluded",
    expand = expansion(mult = c(0.12, 0.12))
  ) +
  scale_y_discrete(name = NULL) +
  
  # Labels
  labs(title = "B. Leave-One-Out Sensitivity Analysis") +
  
  # Theme
  theme_minimal(base_size = 14) +
  theme(
    plot.title = element_text(
      size = 19,
      face = "bold",
      color = COLORS_FIG4$neutral,
      margin = margin(b = 15)
    ),
    axis.title.x = element_text(
      size = 16,
      face = "bold",
      color = COLORS_FIG4$neutral,
      margin = margin(t = 10)
    ),
    axis.text.x = element_text(
      size = 14,
      color = COLORS_FIG4$neutral
    ),
    axis.text.y = element_text(
      size = 12,
      color = COLORS_FIG4$neutral,
      face = "italic"
    ),
    panel.background = element_rect(fill = COLORS_FIG4$bg, color = NA),
    panel.grid.major.x = element_line(
      color = COLORS_FIG4$grid,
      linewidth = 0.3,
      linetype = "dotted"
    ),
    panel.grid.major.y = element_blank(),
    panel.grid.minor = element_blank(),
    plot.background = element_rect(fill = "white", color = NA),
    plot.margin = margin(15, 15, 15, 15)
  )

cat("✓ Panel B created: LOO sensitivity\n")


# ────────────────────────────────────────────────────────────────────────
# 4.4: Combine & Export
# ────────────────────────────────────────────────────────────────────────

# Combine panels
fig4 <- (p4a | p4b) +
  plot_annotation(
    title = "Publication Bias & Robustness Diagnostics",
    theme = theme(
      plot.title = element_text(
        size = 24,
        face = "bold",
        color = COLORS_FIG4$neutral,
        hjust = 0.5,
        margin = margin(b = 20)
      )
    )
  )

# Save
save_publication_plot(
  fig4,
  "Data_Visualization/Step4_Publication_Bias/Figure4_Publication_Bias_and_Robustness",
  width = 26,
  height = 12,
  dpi = 600
)

# Display
print(fig4)

cat("\n" %+% strrep("=", 80) %+% "\n")
cat("✓ STEP 4 COMPLETE: Publication bias & robustness diagnostics exported\n")
cat(strrep("=", 80) %+% "\n\n")