In [None]:
library(dplyr)

In [None]:
generate_table = function(n) {
  # 参数检查
  stopifnot(n > 0 && is.numeric(n) && length(n) == 1)
  
  # 生成性别列（等概率）
  gender = sample(c("男", "女"), size = n, replace = TRUE)
  
  # 生成牛奶品牌列（等概率）
  milk_brand = sample(c("光明", "蒙牛", "三元", "伊利", "其他"),
                      size = n,
                      replace = TRUE)
  
  # 创建数据框
  df = data.frame(
    性别 = gender,
    牛奶品牌 = milk_brand,
    stringsAsFactors = FALSE
  )
  
  return(df)
}

sample_data = generate_table(100)

In [None]:
sample_data

In [None]:
gender_freq_base = as.data.frame(table(sample_data$性别))
names(gender_freq_base) = c("性别", "频数")
gender_freq_base$比例 = prop.table(gender_freq_base$频数)

In [None]:
gender_freq_base

In [None]:
library(dplyr)

gender_freq_tidy = sample_data %>% 
  count(性别, name = "频数") %>%
  mutate(比例 = 频数 / sum(频数))

In [None]:
gender_freq_tidy

In [None]:
brand_freq_tidy = sample_data %>% 
  count(牛奶品牌, name = "频数") %>%
  mutate(比例 = 频数 / sum(频数))

In [None]:
brand_freq_tidy

In [None]:
library(dplyr)
library(tidyr)

cross_tidy = sample_data %>%
  group_by(性别, 牛奶品牌) %>%
  summarise(频数 = n(), .groups = 'drop') %>%
  group_by(性别) %>%
  mutate(比例 = scales::percent(频数 / sum(频数), accuracy = 0.1)) %>%
  pivot_wider(
    names_from = 牛奶品牌,
    values_from = c(频数, 比例),
    values_fill = list(频数 = 0, 比例 = "0.0%")
  )

cross_tidy

In [None]:
library(Hmisc)

describe(sample_data)

In [None]:
library(ggplot2)

In [None]:
gender_plot = ggplot(sample_data, aes(x = 性别)) +
  geom_bar(fill = c("男" = "#e377c2", "女" = "#1f77b4"), width = 0.6) +
  geom_text(
    aes(label = after_stat(count)), 
    stat = "count", 
    vjust = -0.5, 
    size = 6
  ) +
  labs(title = "性别分布", y = "人数") +
  theme_minimal(base_size = 18) +
  theme(plot.title = element_text(hjust = 0.5))

In [None]:
# 品牌分布图
brand_plot = ggplot(sample_data, aes(x = 牛奶品牌)) +
  geom_bar(fill = RColorBrewer::brewer.pal(5, "Set2"), width = 0.7) +
  geom_text(
    aes(label = ..count..), 
    stat = "count", 
    vjust = -0.5, 
    size = 6
  ) +
  labs(title = "品牌分布", y = "人数") +
  theme_minimal(base_size = 18) +
  theme(axis.text.x = element_text(angle = 15, hjust = 1))
 
# 同时显示两个图
gridExtra::grid.arrange(gender_plot, brand_plot, ncol = 2)

In [None]:
# 生成组合数据
combo_data = bind_rows(
  sample_data %>% mutate(type = "性别") %>% rename(category = 性别),
  sample_data %>% mutate(type = "品牌") %>% rename(category = 牛奶品牌)
)

# 绘制分面图
combo_plot = ggplot(combo_data, aes(x = category)) +
  geom_bar(aes(fill = ifelse(type == "性别", category, "品牌")), 
           show.legend = FALSE) +
  geom_text(
    aes(label = ..count..), 
    stat = "count", 
    vjust = -0.5, 
    size = 4
  ) +
  scale_fill_manual(values = c(
    "男" = "#1f77b4", "女" = "#e377c2",
    "品牌" = RColorBrewer::brewer.pal(5, "Set2")[1]
  )) +
  facet_wrap(~type, scales = "free_x") +
  labs(x = "", y = "计数", title = "性别与品牌分布对比") +
  theme_bw(base_size = 14) +
  theme(
    strip.background = element_rect(fill = "white"),
    axis.text.x = element_text(angle = 15, hjust = 1)
  )
 
print(combo_plot)

In [None]:
# 绘制堆叠图

stack_data_brand = sample_data %>%
  group_by(牛奶品牌, 性别) %>%  # 交换分组顺序
  summarise(频数 = n(), .groups = 'drop') %>%
  group_by(牛奶品牌) %>%        # 按品牌分组计算百分比
  mutate(百分比 = 频数 / sum(频数))

stack_plot_brand = ggplot(stack_data_brand, 
                         aes(x = 牛奶品牌, y = 频数, fill = 性别)) +
  geom_col(position = position_stack(reverse = TRUE), 
           width = 0.8, 
           color = "white") +
  geom_text(
    aes(label = sprintf("%d\n(%s)", 频数, scales::percent(百分比, 0.1))),
    position = position_stack(vjust = 0.5, reverse = TRUE),
    color = "white", 
    size = 6,
    lineheight = 0.8
  ) +
  scale_fill_manual(values = c("男" = "#1f77b4", "女" = "#e377c2")) +
  labs(
    title = "性别-品牌分布堆积条形图",
    x = "牛奶品牌",
    y = "人数",
    fill = "性别"
  ) +
  theme_minimal(base_size = 18) +
  theme(
    legend.position = "top",
    panel.grid.major.x = element_blank(),
    plot.title = element_text(hjust = 0.5, face = "bold"),
    axis.text.x = element_text(angle = 15, hjust = 0.8)
  )

print(stack_plot_brand)

In [None]:
library(scales)

# 簇状条形图
cluster_data = sample_data %>%
  group_by(性别, 牛奶品牌) %>%
  summarise(频数 = n(), .groups = 'drop') %>%
  group_by(性别) %>%
  mutate(百分比 = 频数 / sum(频数))

cluster_plot1 = ggplot(cluster_data, 
                      aes(x = 牛奶品牌, y = 频数, fill = 性别)) +
  geom_col(position = position_dodge(width = 0.8), 
           width = 0.7, 
           color = "white") +
  geom_text(aes(label = 频数),
            position = position_dodge(0.8),
            vjust = -0.5,
            size = 5) +
  scale_fill_manual(values = c("男" = "#1f77b4", "女" = "#e377c2")) +
  labs(title = "性别-品牌分布簇状条形图", 
       x = "牛奶品牌", 
       y = "人数") +
  theme_minimal(base_size = 18) +
  theme(
    legend.position = "top",
    panel.grid.major.x = element_blank(),
    plot.title = element_text(hjust = 0.5, face = "bold"),
    axis.text.x = element_text(angle = 15, hjust = 0.8)
  )

print(cluster_plot1)

In [None]:
# 同时显示两个图
gridExtra::grid.arrange(stack_plot_brand, cluster_plot1, ncol = 2)

In [None]:
cluster_plot2 = ggplot(cluster_data, 
                      aes(x = 性别, y = 百分比, fill = 牛奶品牌)) +
  geom_col(position = "dodge",
           width = 0.7,
           color = "white") +
  geom_text(aes(label = percent(百分比, accuracy = 0.1)),
            position = position_dodge(0.7),
            vjust = -0.5,
            size = 4) +
  scale_fill_brewer(palette = "Set2") +
  scale_y_continuous(labels = percent_format()) +
  labs(title = "品牌选择百分比分布（按性别分组）", 
       x = "性别", 
       y = "占比") +
  theme_bw(base_size = 14) +
  theme(
    legend.position = "right",
    plot.title = element_text(hjust = 0.5)
  )

print(cluster_plot2)

In [None]:
# 对比图
bidirectional_data = cluster_data %>%
  mutate(方向值 = ifelse(性别 == "男", 频数, -频数))

cluster_plot3 = ggplot(bidirectional_data, 
                      aes(x = 牛奶品牌, y = 方向值, fill = 性别)) +
  geom_col(position = "dodge", width = 0.6) +
  geom_text(aes(label = abs(方向值)),
            position = position_dodge(0.6),
            size = 4,
            color = "black") +
  scale_fill_manual(values = c("男" = "#4daf4a", "女" = "#984ea3")) +
  scale_y_continuous(labels = abs) +
  coord_flip() +
  labs(title = "性别-品牌分布对比图", 
       x = "牛奶品牌", 
       y = "购买人数") +
  theme_classic(base_size = 14) +
  theme(
    axis.line.y = element_blank(),
    plot.title = element_text(hjust = 0.5)
  )

print(cluster_plot3)

In [None]:
# 饼图
brand_data = sample_data %>%
  count(牛奶品牌, name = "频数") %>%
  mutate(百分比 = 频数 / sum(频数)) %>%
  arrange(desc(频数))  # 按频数降序排列

ggplot(brand_data, aes(x = "", y = 百分比, fill = 牛奶品牌)) +
  geom_col(width = 1, color = "white") +
  coord_polar(theta = "y") +
  geom_text(aes(label = paste0(牛奶品牌, "\n", scales::percent(百分比, accuracy = 0.1))),
            position = position_stack(vjust = 0.5),
            color = "white",
            size = 8) +
  scale_fill_brewer(palette = "Set2") +
  labs(title = "牛奶品牌消费比例") +
  theme_void(base_size = 22) +
  theme(
    plot.title = element_text(hjust = 0.5, face = "bold", size = 24),
    legend.position = "right"
  )