In [None]:
install.packages("pbkrtest")

In [None]:
library(lme4)
library(tidyverse)
library(pbkrtest)

In [None]:
install.packages("googledrive")
library(googledrive) #In R runtime we dont have the drive auth function. We have to use a package.
drive_auth(use_oob = TRUE) #Mounting Drive

In [None]:


merged_df <- do.call(rbind, all_data)

# If you need to reset row names to avoid duplicate row names
rownames(merged_df) <- NULL

merged_df$Outlet <- sapply(strsplit(merged_df$Filename, '--'), '[', 1)


In [None]:
unique(merged_df$Outlet)

In [None]:
unique(merged_df$label)

Test the inner relationship among political labels

In [None]:
library(lme4)
library(lmerTest) # for ANOVA with mixed models
library(emmeans) # for post hoc tests
emm_options(lmerTest.limit = 61169)

columns_to_plot <- c('care.virtue', 'fairness.virtue', 'loyalty.virtue',
                     'authority.virtue', 'sanctity.virtue',
                     'care.vice', 'fairness.vice', 'loyalty.vice',
                     'authority.vice', 'sanctity.vice')

political_labels <- c("left", "left_center", "center", "right_center", "right")

for (column in columns_to_plot) {
  cat("Analyzing:", column, "\n")

  filtered_data <- merged_df[merged_df$label %in% political_labels, ]

  formula <- as.formula(paste(column, "~ label + (1 | Outlet)"))
  model <- lmer(formula, data = filtered_data)

  anova_result <- anova(model)
  print(anova_result)

  if (anova_result$'Pr(>F)'[1] < 0.05) {
    # Perform post hoc tests if the overall test is significant
    post_hoc <- emmeans(model, pairwise ~ label)
    print(post_hoc)
  }

  cat("\n") # Newline for readability
}

In [None]:
emm_options(pbkrtest.limit = 4461)

years <- c(2019, 2020, 2021, 2022)

for (year in years) {
  cat("Year:", year, "\n")

  year_data <- merged_df[merged_df$Year == year & merged_df$label %in% political_labels, ]

  for (column in columns_to_plot) {
    cat("Analyzing:", column, "\n")

    # Fit the mixed-effects model
    formula <- as.formula(paste(column, "~ label + (1 | Outlet)"))
    model <- lmer(formula, data = year_data)

    anova_result <- anova(model)
    print(anova_result)

    if (anova_result$'Pr(>F)'[1] < 0.05) {

      post_hoc <- emmeans(model, pairwise ~ label)
      print(post_hoc)
    }

    cat("\n")
  }
  cat("\n")
}

2020, 2021 - no significant difference from ANOVA test
2019, 2022 - frequently significant difference from ANOVA test, involving 'center' and other labels

In [None]:
emm_options(pbkrtest.limit = 43137)
columns_to_plot <- c('care.virtue', 'fairness.virtue', 'loyalty.virtue',
                     'authority.virtue', 'sanctity.virtue',
                     'care.vice', 'fairness.vice', 'loyalty.vice',
                     'authority.vice', 'sanctity.vice')

labels_to_examine <- c("left", "left_center", "center", "right_center", "right",
                       "conspiracy_pseudoscience", "questionable_source",
                       "satire", "pro-science")

for (column in columns_to_plot) {
  cat("Analyzing:", column, "\n")

  filtered_data <- merged_df[merged_df$label %in% labels_to_examine, ]

  formula <- as.formula(paste(column, "~ label + (1 | Outlet)"))
  model <- lmer(formula, data = filtered_data)

  anova_result <- anova(model)
  print(anova_result)

  if (anova_result$'Pr(>F)'[1] < 0.05) {

    post_hoc <- emmeans(model, pairwise ~ label)
    print(post_hoc)
  }

  cat("\n")
}

conspiracy_pseudoscience is generally higher in all the virtue scores.

In [None]:
merged_df$care_diff = merged_df$care.virtue - merged_df$care.vice
merged_df$loyalty_diff = merged_df$loyalty.virtue - merged_df$loyalty.vice
merged_df$fairness_diff = merged_df$fairness.virtue - merged_df$fairness.vice
merged_df$authority_diff = merged_df$authority.virtue - merged_df$authority.vice
merged_df$sanctity_diff = merged_df$sanctity.virtue - merged_df$sanctity.vice

labels_to_examine <- c("left", "left_center", "center", "right_center", "right",
                       "conspiracy_pseudoscience", "questionable_source",
                       "satire", "pro-science")
diff_scores <- c('care_diff', 'loyalty_diff', 'fairness_diff', 'authority_diff', 'sanctity_diff')

for (diff_score in diff_scores) {
  cat("Analyzing:", diff_score, "\n")

  filtered_data <- merged_df[merged_df$label %in% labels_to_examine, ]

  model <- lmer(as.formula(paste(diff_score, "~ label + (1 | Outlet)")), data = filtered_data)

  anova_result <- anova(model)
  print(anova_result)

  if (anova_result$'Pr(>F)'[1] < 0.05) {

    post_hoc <- emmeans(model, pairwise ~ label)
    print(post_hoc)
  }

  cat("\n") # Newline for readability
}

In [None]:
library(lme4)
library(lmerTest) # For p-values in the model summary
library(ggplot2)
# install.packages("lmtest")
library(lmtest)

years <- c(2017, 2018, 2019, 2020, 2021, 2022)

# Initialize an empty list to store dataframes
all_data <- list()

# Loop through each year
for (year in years) {
  # Read the CSV file for the current year
  df <- read.csv(paste0(year, "_doc_mf_scores.csv"))

  # Add a 'Year' column
  df$Year <- year

  # Append the dataframe to the list
  all_data[[length(all_data) + 1]] <- df
}

merged_df <- do.call(rbind, all_data)

# If you need to reset row names to avoid duplicate row names
rownames(merged_df) <- NULL

merged_df$Outlet <- sapply(strsplit(merged_df$Filename, '--'), '[', 1)

# Columns to analyze
columns_to_plot <- c('care.virtue', 'fairness.virtue', 'loyalty.virtue',
                     'authority.virtue', 'sanctity.virtue',
                     'care.vice', 'fairness.vice', 'loyalty.vice',
                     'authority.vice', 'sanctity.vice')

# # Convert Doc_Label to a numeric variable named Label_Numeric
# label_numeric <- c('left' = 1, 'left_center' = 2, 'center' = 3, 'right-center' = 4, 'right' = 5)
# # Convert labels of interest into numeric
# merged_df$Label_Numeric <- as.numeric(factor(merged_df$Doc_Label, levels = names(label_numeric)))

# # Filter to only keep rows with labels of interest
# df_filtered <- merged_df[!is.na(merged_df$Label_Numeric), ]


# # Analyze each column
# for (column in columns_to_plot) {
#   formula <- as.formula(paste(column, "~ Label_Numeric + (1|Outlet)"))
#   model <- lmer(formula, data = df_filtered)

#   # Print model summary
#   print(column)
#   print(summary(model))

# }

for (year in years) {
  cat("Year:", year, "\n")

  # Filter the dataframe for the current year
  year_df <- merged_df[merged_df$Year == year, ]

  # Loop through each column to plot
  for (column in columns_to_plot) {
    # Update the formula with the current column
    # Assuming 'label' is your fixed effect and 'Outlet' (extracted from 'Filename') is your random effect
    formula <- as.formula(paste(column, "~ label + (1 | Outlet)"))

    # Fit the mixed effects model
    model <- lmer(formula, data = year_df)

    # Print the summary of the model
    print(summary(model))
  }
  cat("\n")  # Print a newline for better readability between years
}

full_model <- lmer(as.formula(paste(columns_to_plot[1], "~ label + (1 | Outlet)")), data = merged_df)

# Reduced model without the random effect
reduced_model <- lm(as.formula(paste(columns_to_plot[1], "~ label")), data = merged_df)

# Conduct the likelihood ratio test using lrtest
lr_test <- lrtest(reduced_model, full_model)

# Print the results of the likelihood ratio test
print(lr_test)

In [None]:
merged_df

In [None]:
for (column in columns_to_fit) {
    print(summary(models_list[[column]]))
}

In [None]:
data <- read.csv("updated_doc_mf_scores.csv")
data$outlet <- str_extract(data$Filename, "^[^\\-\\-]*")
data$Doc_Label <- as.factor(data$Doc_Label)
data$Doc_Label <- relevel(data$Doc_Label, ref = "center")


In [None]:
columns_to_analyze <- c('care.virtue', 'fairness.virtue', 'loyalty.virtue',
                        'authority.virtue', 'sanctity.virtue', 'care.vice',
                        'fairness.vice', 'loyalty.vice', 'authority.vice',
                        'sanctity.vice')

for(column in columns_to_analyze) {
  formula_string <- paste(column, "~ Doc_Label + (1|outlet)")
  formula_obj <- as.formula(formula_string)

  model <- lmer(formula_obj, data = data)

  cat("\n\nSummary for", column, ":\n")
  print(summary(model))
}

For each of the virtues/vices, there is a non-zero variance for the outlet (Intercept), but the variance is very small and very similar to the Residual Variance. It shows that there is a slight consistency among the articles in the same outlet, but it is likely due to the labeling effect - the variance might due to the difference in the article levels but not in the outlet levels.

In the labeling level, the t value is small in all cases, so there is no significant effect of different label comparing the center label in all scorings.