This repository has been archived by the owner on Jul 7, 2024. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 7
/
sad-comparisons.py
187 lines (146 loc) · 8.38 KB
/
sad-comparisons.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
""" Project code for comparing species abundance distribution (SAD) models
To run this code:
python sad-comparisons.py
To use a non-standard data directory:
python sad-comparisons.py /path/to/data_dir
To run data sets other than the default publicly available data add a file to
the data directory (`./sad-data` by default) named `dataset_config.txt` that
contains a list of dataset names, one on each line.
This code depends on the most recent version of the macroecotools Python
module, which can be installed directly from github using pip:
pip install git+https://github.com/weecology/macroecotools.git
"""
from __future__ import division
import csv
import numpy as np
import os
import sys
from math import log, exp
from pandas import DataFrame
import macroecotools
import macroeco_distributions as md
def import_abundance(datafile):
"""Imports raw species abundance .csv files in the form: Site, Year, Species, Abundance."""
raw_data = np.genfromtxt(datafile, dtype = "S15,i8,S50,i8", names = ['site','year','sp','ab'], delimiter = ",",comments = "#")
return raw_data
def model_comparisons(raw_data, dataset_name, data_dir, cutoff = 9):
""" Uses raw species abundance data to compare predicted vs. empirical species abundance distributions (SAD) and output results in csv files.
Keyword arguments:
raw_data: numpy structured array with 4 columns: 'site', 'year', 'sp' (species), 'ab' (abundance).
dataset_name: short code to indicate the name of the dataset in the output file names.
data_dir: directory in which to store results output.
cutoff: minimum number of species required to run -1.
SAD models and packages used:
Logseries (macroecotools/macroecodistributions)
Poisson lognormal (macroecotools/macroecodistributions)
Negative binomial (macroecotools/macroecodistributions)
Zipf (macroecotools/macroecodistributions)
Neutral theory: Neutral theory predicts the negative binomial distribution (Connolly et al. 2014. Commonness and rarity in the marine biosphere. PNAS 111: 8524-8529. http://www.pnas.org/content/111/23/8524.abstract
"""
usites = np.sort(np.unique(raw_data["site"]))
# Open output files
f1 = open(data_dir + dataset_name + '_dist_test.csv','wb')
output1 = csv.writer(f1)
f2 = open(data_dir + dataset_name + '_likelihoods.csv','wb')
output2 = csv.writer(f2)
f3 = open(data_dir + dataset_name + '_relative_L.csv','wb')
output3 = csv.writer(f3)
# Insert header
output1.writerow(['site', 'S', 'N', 'AICc_logseries', 'AICc_pln', 'AICc_negbin', 'AICc_zipf'])
output2.writerow(['site', 'S', 'N', 'likelihood_logseries', 'likelihood_pln', 'likelihood_negbin', 'likelihood_zipf'])
output3.writerow(['site', 'S', 'N', 'relative_ll_logseries', 'relative_ll_pln', 'relative_ll_negbin', 'relative_ll_zipf'])
results = []
for site in usites:
subsites = raw_data["site"][raw_data["site"] == site]
subabundance = raw_data["ab"][raw_data["site"] == site]
N = sum(subabundance) # N = total abundance for a site
S = len(subsites) # S = species richness at a site
if (min(subabundance) > 0) and (S > cutoff):
print("%s, Site %s, S=%s, N=%s" % (dataset_name, site, S, N))
# Calculate Akaike weight of species abundance models:
# Parameter k is the number of fitted parameters
k1 = 1
k2 = 2
# Calculate log-likelihoods of species abundance models and calculate AICc values:
# Logseries
p_untruncated = md.logser_solver(subabundance)
L_logser_untruncated = md.logser_ll(subabundance, p_untruncated) # Log-likelihood of untruncated logseries
AICc_logser_untruncated = macroecotools.AICc(k1, L_logser_untruncated, S) # AICc logseries untruncated
relative_ll_logser_untruncated = AICc_logser_untruncated# Relative likelihood untruncated logseries
#Start making AICc list
AICc_list = [AICc_logser_untruncated]
likelihood_list = [L_logser_untruncated]
relative_likelihood_list = [relative_ll_logser_untruncated]
# Poisson lognormal
mu, sigma = md.pln_solver(subabundance)
L_pln = md.pln_ll(subabundance, mu,sigma) # Log-likelihood of Poisson lognormal
AICc_pln = macroecotools.AICc(k2, L_pln, S) # AICc Poisson lognormal
relative_ll_pln = macroecotools.AICc(k1, L_pln, S) #Relative likelihood, Poisson lognormal
# Add to AICc list
AICc_list = AICc_list + [AICc_pln]
likelihood_list = likelihood_list + [L_pln]
relative_likelihood_list = relative_likelihood_list + [relative_ll_pln]
# Negative binomial
n0, p0 = md.nbinom_lower_trunc_solver(subabundance)
L_negbin = md.nbinom_lower_trunc_ll(subabundance, n0, p0) # Log-likelihood of negative binomial
AICc_negbin = macroecotools.AICc(k2, L_negbin, S)# AICc negative binomial
relative_ll_negbin = macroecotools.AICc(k1, L_negbin, S) # Relative log-likelihood of negative binomial
# Add to AICc list
AICc_list = AICc_list + [AICc_negbin]
likelihood_list = likelihood_list + [L_negbin]
relative_likelihood_list = relative_likelihood_list + [relative_ll_negbin]
# Zipf distribution
par = md.zipf_solver(subabundance)
L_zipf = md.zipf_ll(subabundance, par) #Log-likelihood of Zipf distribution
AICc_zipf = macroecotools.AICc(k1, L_zipf, S)
relative_ll_zipf = AICc_zipf
#Add to AICc list
AICc_list = AICc_list + [AICc_zipf]
likelihood_list = likelihood_list + [L_zipf]
relative_likelihood_list = relative_likelihood_list + [relative_ll_zipf]
# Calculate AICc weight
weight = macroecotools.aic_weight(AICc_list, S, cutoff = 4)
#Calculate relative likelihood
relative_likelihoods = macroecotools.aic_weight(relative_likelihood_list, S, cutoff = 4)
# Convert weight to list
weights_output = weight.tolist()
#Convert relative likelihoods to list
relative_likelihoods_output = relative_likelihoods.tolist()
# Format results for output
for weight in weights_output:
results1 = [[site, S, N] + weights_output]
results2 = [[site, S, N] + likelihood_list]
results3 = [[site, S, N] + relative_likelihoods_output]
results.append([site, S, N] + weights_output + likelihood_list + relative_likelihoods_output)
# Save results to a csv file:
output1.writerows(results1)
output2.writerows(results2)
output3.writerows(results3)
results = DataFrame(results, columns=['site', 'S', 'N', 'AICc_logseries', 'AICc_pln', 'AICc_negbin',
'AICc_zipf', 'likelihood_logseries', 'likelihood_pln',
'likelihood_negbin', 'likelihood_zipf', 'relative_ll_logseries',
'relative_ll_pln', 'relative_ll_negbin', 'relative_ll_zipf'])
results.to_csv(os.path.join(data_dir, dataset_name + '_likelihood_results.csv'), index=False)
f1.close()
f2.close()
f3.close()
if __name__ == '__main__':
# Set up analysis parameters
analysis_ext = '_spab.csv' # Extension for raw species abundance files
if len(sys.argv) > 1:
data_dir = sys.argv[1]
else:
data_dir = './sad-data/'
#Determine which datasets to use
if os.path.exists(data_dir + 'dataset_config.txt'):
dataset_config_file = open(data_dir + 'dataset_config.txt', 'r')
datasets = []
for line in dataset_config_file:
datasets.append(line.strip())
else:
datasets = ['bbs', 'fia', 'gentry', 'mcdb']
# Starts actual analyses for each dataset in turn.
for dataset in datasets:
datafile = data_dir + dataset + analysis_ext
raw_data = import_abundance(datafile) # Import data
model_comparisons(raw_data, dataset, data_dir, cutoff = 9) # Run analyses on data