In [None]:
import pandas as pd
import numpy as np

# Seed for reproducibility
np.random.seed(42)

# Create the dataset
data = {
    'plot': ['p1'] * 4 + ['p2'] * 4 + ['p3'] * 4 + ['p4'] * 4,
    'tree-age': np.random.randint(10, 20, size=16),  # Random tree ages between 2 and 5
    'sampleid': list(range(1, 17)),
    'Height0': np.random.uniform(2, 5, size=16),  # Initial heights between 10 and 20
}

# Adding repeated measures for Height after 3 and 6 years
data['Height.after.3.years'] = data['Height0'] + np.random.uniform(0, 3, size=16)  # Growth after 3 years
data['Height.after.6.years'] = data['Height.after.3.years'] + np.random.uniform(0, 3, size=16)  # Growth after 6 years

# Create DataFrame
df = pd.DataFrame(data)


df.to_csv("D:/大连理工大学/R语言书稿/Rforestat/R_code/chap4/data-eg.csv", index=False)

In [None]:
import pandas as pd
import numpy as np

# 设置随机种子以确保可重复性
np.random.seed(42)

# 创建数据集
n_per_plot = 4  # 每个地块的样本数
n_plots = 4     # 地块数量
total_samples = n_per_plot * n_plots

# 为不同地块设置不同的基准高度，创造更明显的地块效应
plot_base_heights = {
    'p1': 2.0,
    'p2': 3.0,
    'p3': 4.0,
    'p4': 5.0
}

data = {
    'plot': [],
    'tree_age': [],
    'sampleid': [],
    'Height0': []
}

sample_id = 1
for plot, base_height in plot_base_heights.items():
    for _ in range(n_per_plot):
        # 增加树龄范围，使其与高度有更强的相关性
        age = np.random.randint(5, 25)
        
        # 使高度与树龄有更强的相关性，并加入地块效应
        height = base_height + 0.2 * age + np.random.normal(0, 0.5)
        
        data['plot'].append(plot)
        data['tree_age'].append(age)
        data['sampleid'].append(sample_id)
        data['Height0'].append(height)
        sample_id += 1

# 创建DataFrame
df = pd.DataFrame(data)
# 添加后续生长数据
df['Height.after.3.years'] = df['Height0'] + df['tree_age'] * 0.1 + np.random.normal(0, 0.3, size=len(df))
df['Height.after.6.years'] = df['Height.after.3.years'] + df['tree_age'] * 0.15 + np.random.normal(0, 0.3, size=len(df))

# 保存到CSV
df.to_csv("D:/大连理工大学/R语言书稿/Rforestat/R_code/chap4/data-eg.csv", index=False)


In [16]:
# 树高主要由树龄决定

import pandas as pd
import numpy as np

np.random.seed(42)

n_per_plot = 4
data = {
    'plot': ['p1'] * n_per_plot + ['p2'] * n_per_plot + 
            ['p3'] * n_per_plot + ['p4'] * n_per_plot,
    'sampleid': list(range(1, 17))
}


# 创建树龄数据，让不同地块的树龄有系统性差异
age_means = [8, 12, 16, 20]  # 每个地块的平均树龄
data['tree_age'] = []
for mean in age_means:
    ages = np.random.randint(mean-2, mean+3, n_per_plot)
    data['tree_age'].extend(ages)

# 创建身高数据，使其主要依赖于树龄而不是地块
data['Height0'] = []
for i in range(len(data['plot'])):
    # 基础高度主要由树龄决定
    height = 2 + 0.3 * data['tree_age'][i]
    # 添加轻微的地块效应
    plot_effect = {'p1': 0.1, 'p2': -0.1, 'p3': 0.2, 'p4': -0.2}
    height += plot_effect[data['plot'][i]]
    # 添加随机噪声
    height += np.random.normal(0, 0.5)
    data['Height0'].append(height)

df = pd.DataFrame(data)
df.to_csv("D:/大连理工大学/R语言书稿/Rforestat/R_code/chap4/data-eg.csv", index=False)


In [None]:
import pandas as pd
import numpy as np

np.random.seed(42)

n_per_plot = 4
data = {
    'plot': ['p1'] * n_per_plot + ['p2'] * n_per_plot + 
            ['p3'] * n_per_plot + ['p4'] * n_per_plot,
    'sampleid': list(range(1, 17))
}

# 创建胸径数据
dbh_means = [15, 18, 21, 24]  # 每个样地的平均胸径
data['DBH'] = []
for mean in dbh_means:
    dbh = np.random.normal(mean, 2, n_per_plot)
    data['DBH'].extend(dbh)

# 创建树高数据
data['Height'] = []
for i in range(len(data['plot'])):
    # 基础高度主要由胸径决定
    height = 5 + 0.5 * data['DBH'][i]
    # 添加样地效应
    plot_effect = {'p1': 1.5, 'p2': -1.5, 'p3': 2, 'p4': -2}
    height += plot_effect[data['plot'][i]]
    # 添加随机噪声
    height += np.random.normal(0, 1.5)
    data['Height'].append(height)

df = pd.DataFrame(data)
df.to_csv("D:/大连理工大学/R语言书稿/Rforestat/R_code/chap4/data-eg.csv", index=False)


In [17]:
import pandas as pd
import numpy as np

np.random.seed(42)

n_per_plot = 4
data = {
    'plot': ['p1'] * n_per_plot + ['p2'] * n_per_plot + 
            ['p3'] * n_per_plot + ['p4'] * n_per_plot,
    'sampleid': list(range(1, 17))
}

# 样地基准高度
plot_base_height = {'p1': 18, 'p2': 15, 'p3': 22, 'p4': 12}

# 创建胸径数据 - 随机分布，掩盖样地效应
data['DBH'] = np.random.randint(15, 35, size=16)

# 创建树高数据
data['Height'] = []
for i in range(len(data['plot'])):
    # 基础高度由样地决定
    base_height = plot_base_height[data['plot'][i]]
    # 加入胸径影响作为干扰
    height = base_height + 0.3 * (data['DBH'][i] - np.mean(data['DBH']))
    # 添加随机噪声
    height += np.random.normal(0, 2)
    data['Height'].append(height)

df = pd.DataFrame(data)
df.to_csv("D:/大连理工大学/R语言书稿/Rforestat/R_code/chap4/data-eg.csv", index=False)

print("\n数据摘要:")
print(df.groupby('plot')[['Height', 'DBH']].agg(['mean', 'std']))


数据摘要:
         Height              DBH          
           mean       std   mean       std
plot                                      
p1    17.937267  2.802717  27.25  5.560276
p2    14.144605  2.367710  25.25  5.439056
p3    20.672272  2.916360  20.50  3.696846
p4    10.017160  0.047614  19.50  4.725816


In [20]:
import pandas as pd
import numpy as np

np.random.seed(42)

n_per_plot = 4
data = {
    'plot': ['p1'] * n_per_plot + ['p2'] * n_per_plot + 
            ['p3'] * n_per_plot + ['p4'] * n_per_plot,
    'sampleid': list(range(1, 17))
}

# 设置每个样地的DBH分布（制造混淆效应）
plot_dbh_means = {'p1': 30, 'p2': 20, 'p3': 15, 'p4': 25}
data['DBH'] = []
for p in data['plot']:
    data['DBH'].append(np.random.normal(plot_dbh_means[p], 2))

# 设置实际的样地效应
plot_effects = {'p1': 3, 'p2': -2, 'p3': -3, 'p4': 2}

# 生成树高
data['Height0'] = []
for i in range(len(data['plot'])):
    # DBH效应
    dbh_effect = 0.5 * data['DBH'][i]
    # 样地效应
    plot_effect = plot_effects[data['plot'][i]]
    # 最终树高
    height = 10 + dbh_effect + plot_effect + np.random.normal(0, 1)
    data['Height0'].append(height)

data['Height.after.3.years'] = data['Height0'] + np.random.uniform(0, 3, size=16)
data['Height.after.6.years'] = data['Height.after.3.years'] + np.random.uniform(0, 3, size=16)


df = pd.DataFrame(data)
df.to_csv("D:/大连理工大学/R语言书稿/Rforestat/R_code/chap4/data-eg.csv", index=False)


In [3]:
import pandas as pd
import numpy as np

np.random.seed(6)

n_per_plot = 4
data = {
    'plot': ['p1'] * n_per_plot + ['p2'] * n_per_plot + 
            ['p3'] * n_per_plot + ['p4'] * n_per_plot,
    'sampleid': list(range(1, 17))
}

# 设置每个样地的DBH分布（制造混淆效应）
plot_dbh_means = {'p1': 30, 'p2': 20, 'p3': 15, 'p4': 25}
data['DBH'] = []
for p in data['plot']:
    data['DBH'].append(np.random.normal(plot_dbh_means[p], 2))

# 设置实际的样地效应
plot_effects = {'p1': 3, 'p2': -2, 'p3': -3, 'p4': 2}

# 生成初始树高
data['Height0'] = []
for i in range(len(data['plot'])):
    dbh_effect = 0.5 * data['DBH'][i]
    plot_effect = plot_effects[data['plot'][i]]
    height = 10 + dbh_effect + plot_effect + np.random.normal(0, 1)
    data['Height0'].append(height)

# 设置不同样地的生长速率（创造交互效应）
growth_rates = {
    'p1': 0.33,  # 样地1生长快
    'p2': 0.21,  # 样地2生长中等
    'p3': 0.15,  # 样地3生长较快
    'p4': 0.1   # 样地4生长慢
}

# 生成3年后树高
data['Height.after.3.years'] = []
for i in range(len(data['plot'])):
    base_height = data['Height0'][i]
    growth = growth_rates[data['plot'][i]] * 3  # 3年的生长量
    height = base_height + growth + np.random.normal(0, 0.5)  # 添加少量随机变异
    data['Height.after.3.years'].append(height)

# 生成6年后树高
data['Height.after.6.years'] = []
for i in range(len(data['plot'])):
    base_height = data['Height.after.3.years'][i]
    growth = growth_rates[data['plot'][i]] * 3  # 后3年的生长量
    # 生长速率略微减缓
    height = base_height + growth * 0.9 + np.random.normal(0, 0.5)
    data['Height.after.6.years'].append(height)


df = pd.DataFrame(data)
df = df.round(1)
df.to_csv("D:/大连理工大学/R语言书稿/Rforestat/R_code/chap4/data-eg.csv", index=False)

# 打印数据摘要
print("\n各样地在不同时期的树高均值:")
summary = df.groupby('plot')[['Height0', 'Height.after.3.years', 'Height.after.6.years']].mean()
print(summary)


各样地在不同时期的树高均值:
      Height0  Height.after.3.years  Height.after.6.years
plot                                                     
p1      28.30                29.500                30.375
p2      17.60                17.825                18.625
p3      16.10                16.675                17.375
p4      24.35                24.500                24.825
