# 第二步：纯标准库（艰难方式）

- 禁用：pandas / statistics / numpy / 外部库
- 仍使用 **同一数据集与同一列**
- 自己实现均值/中位数/众数

In [None]:

import csv
from pathlib import Path
from collections import defaultdict

csv_path = Path("data/dataset.csv")
numeric_column_name = "Count of Students"  # 可改

# 读取 header
with open(csv_path, "r", newline="", encoding="utf-8") as f:
    r = csv.reader(f)
    header = next(r)
idx = header.index(numeric_column_name)

# 收集数据（字符串清洗 -> float）
values = []
with open(csv_path, "r", newline="", encoding="utf-8") as f:
    r = csv.reader(f)
    _ = next(r)  # skip header
    for row in r:
        cell = row[idx].strip().replace(",", "")
        if cell == "":
            continue
        try:
            v = float(cell)
            values.append(v)
        except Exception:
            pass

# 均值
total = 0.0
for v in values:
    total += v
mean_val = total / len(values) if values else float("nan")

# 中位数
values_sorted = sorted(values)
n = len(values_sorted)
if n == 0:
    median_val = float("nan")
elif n % 2 == 1:
    median_val = values_sorted[n // 2]
else:
    median_val = (values_sorted[n // 2 - 1] + values_sorted[n // 2]) / 2.0

# 众数（可能多个）
freq = defaultdict(int)
for v in values:
    freq[v] += 1
max_count = max(freq.values()) if freq else 0
modes = [k for k, c in freq.items() if c == max_count]

print("Count:", len(values))
print("Mean:", mean_val)
print("Median:", median_val)
print("Mode(s) (first 10):", modes[:10], "..." if len(modes) > 10 else "")
