Skip to content

Commit

Permalink
Merge pull request #644 from sjspielman/2.3.4
Browse files Browse the repository at this point in the history
Binary model, JC69 nucleotide model, and minor cleanups to relative_nucleotide_rates.bf
  • Loading branch information
Stephanie committed Sep 27, 2017
2 parents 0a3ecfa + e8c365e commit cd5adfd
Show file tree
Hide file tree
Showing 12 changed files with 456 additions and 243 deletions.
4 changes: 2 additions & 2 deletions res/TemplateBatchFiles/ProteinAnalyses/plusF_helper.ibf
Original file line number Diff line number Diff line change
Expand Up @@ -2,12 +2,12 @@
plusF_helper.empirical_model_generators = {"LG": "models.protein.LG.ModelDescription",
"WAG": "models.protein.WAG.ModelDescription",
"JTT": "models.protein.JTT.ModelDescription",
"JC": "models.protein.JC.ModelDescription"};
"JC69": "models.protein.JC69.ModelDescription"};

plusF_helper.Rij_options = {"LG": models.protein.empirical.LG.empirical_R,
"WAG": models.protein.empirical.WAG.empirical_R,
"JTT": models.protein.empirical.JTT.empirical_R,
"JC": models.protein.empirical.JC.empirical_R};
"JC69": models.protein.empirical.JC69.empirical_R};


// This code is elegant and I will hear nothing to the contrary.
Expand Down
5 changes: 2 additions & 3 deletions res/TemplateBatchFiles/ProteinAnalyses/relative_prot_rates.bf
Original file line number Diff line number Diff line change
Expand Up @@ -65,13 +65,12 @@ relative_prot_rates.filter_specification = alignments.DefineFiltersForPartitions
/***************************************** SELECT MODEL, +F **********************************************************/
/** NOTE: there is no rate variation option here ever. This must be discouraged. **/

// "WAG", "LG", "JTT", "JC"
relative_prot_rates.model_name = io.SelectAnOption (models.protein.empirical_models,
"Select an empirical protein model for fitting rates (we recommend JC to avoid matrix-induced biases in inferred rates):");
"Select an empirical protein model for fitting rates (we recommend JC69 to avoid matrix-induced biases in inferred rates):");

// "Yes", "No"
relative_prot_rates.plusF = io.SelectAnOption ({{"Yes", "Use empirical (+F) amino-acid frequencies ."}, {"No", "Use default amino-acid frequencies."}},
"Use a +F model for initial bl optimization? (recommended no for a simple JC model)");
"Use a +F model for initial bl optimization? (recommended no for a simple JC69 model)");


lfunction relative_prot_rates.plusF.ModelDescription(type) {
Expand Down
8 changes: 6 additions & 2 deletions res/TemplateBatchFiles/libv3/all-terms.bf
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@

// These terms must be defined in this manner in order to avoid conflicts w/ built-in functions global and Gamma
terms.global = "global";
terms.json.global = "Global model fit";
terms.json.global = "Global model fit"; // TODO: Kill
terms.rate_variation.Gamma = "Gamma";


Expand Down Expand Up @@ -88,7 +88,10 @@ namespace terms{
return "Substitution rate from nucleotide " + fromC + " to nucleotide " + toC;
}
function aminoacidRate(fromA, toA) {
return "Substitution rate from aminoacid " + fromA + " to aminoacid " + toA;
return "Substitution rate from amino-acid " + fromA + " to amino-acid " + toA;
}
function binaryRate(fromX, toX) {
return "Substitution rate from character " + fromX + " to character " + toX;
}
function timeParameter() {
return "Evolutionary time parameter";
Expand Down Expand Up @@ -170,6 +173,7 @@ namespace terms{
CF3x4 = "Corrected 3x4 frequency estimator";
_20x1 = "Protein 20x1 estimator";
predefined = "Based on a training set";
binary = "Binary character frequency estimator";
}

/* Terms accompanying tasks/genetic_code.bf */
Expand Down
21 changes: 0 additions & 21 deletions res/TemplateBatchFiles/libv3/function-loader.bf

This file was deleted.

4 changes: 3 additions & 1 deletion res/TemplateBatchFiles/libv3/models/DNA.bf
Original file line number Diff line number Diff line change
Expand Up @@ -5,9 +5,11 @@ LoadFunctionLibrary ("libv3/all-terms.bf");
/** @module models.DNA */

models.DNA.models = {{"GTR", "General time reversible model"},
{"HKY85", "Hasegawa Kishino Yano 85 (HKY85) model"}};
{"HKY85", "Hasegawa Kishino Yano 85 (HKY85) model"},
{"JC69", "Jukes-Cantor 69 (JC69) model"}};

models.DNA.alphabet = {{"A","C","G","T"}};
models.DNA.dimensions = 4;

/**
* @name models.DNA.generic.DefineQMatrix
Expand Down
71 changes: 71 additions & 0 deletions res/TemplateBatchFiles/libv3/models/DNA/JC69.bf
Original file line number Diff line number Diff line change
@@ -0,0 +1,71 @@
LoadFunctionLibrary("../DNA.bf");
LoadFunctionLibrary("../parameters.bf");
LoadFunctionLibrary("../frequencies.bf");
LoadFunctionLibrary("../../UtilityFunctions.bf");
LoadFunctionLibrary ("libv3/all-terms.bf");

/** @module models.DNA.JC69 */

/**
* @name models.DNA.JC69.ModelDescription
* @param {String} type
*/
lfunction models.DNA.JC69.ModelDescription(type) {

return {
utility.getGlobalValue("terms.alphabet"): utility.getGlobalValue("models.DNA.alphabet"),
utility.getGlobalValue("terms.description"): "The JC69 equal-rates model of nucleotide substitution",
utility.getGlobalValue("terms.model.canonical"): 1, // is of the r_ij \times \pi_j form
utility.getGlobalValue("terms.model.reversible"): 1,
utility.getGlobalValue("terms.model.efv_estimate_name"): utility.getGlobalValue("terms.frequencies.equal"),
utility.getGlobalValue("terms.parameters"): {
utility.getGlobalValue("terms.global"): {},
utility.getGlobalValue("terms.local"): {},
utility.getGlobalValue("terms.model.empirical"): 0
},
utility.getGlobalValue("terms.model.type"): type,
utility.getGlobalValue("terms.model.get_branch_length"): "",
utility.getGlobalValue("terms.model.set_branch_length"): "models.generic.SetBranchLength",
utility.getGlobalValue("terms.model.constrain_branch_length"): "models.generic.constrain_branch_length",
utility.getGlobalValue("terms.model.frequency_estimator"): "frequencies.equal",
utility.getGlobalValue("terms.model.q_ij"): "models.DNA.JC69._generateRate",
utility.getGlobalValue("terms.model.time"): "models.DNA.generic.Time",
utility.getGlobalValue("terms.model.defineQ"): "models.DNA.JC69._defineQ",
utility.getGlobalValue("terms.model.post_definition"): "models.generic.post.definition"
};
}

/**
* @name models.DNA.JC69._generateRate
* @param {Number} fromChar
* @param {Number} toChar
* @param {String} namespace
* @param {String} model_type
* @returns models.DNA.JC69._generateRate.p
*/
function models.DNA.JC69._generateRate(fromChar, toChar, namespace, model_type) {
models.DNA.JC69._generateRate.p = {};
models.DNA.JC69._generateRate.p[model_type] = {};

models.DNA.JC69.parameter_name = "theta";

if (model_type == terms.global) {
models.DNA.JC69.parameter_name = parameters.ApplyNameSpace(models.DNA.JC69.parameter_name, namespace);
}

(models.DNA.JC69._generateRate.p[model_type])[terms.nucleotideRate(fromChar, toChar)] = models.DNA.JC69.parameter_name;
models.DNA.JC69._generateRate.p[terms.model.rate_entry] = models.DNA.JC69.parameter_name;

return models.DNA.JC69._generateRate.p;
}

/**
* @name models.DNA.JC69._defineQ
* @param {Dictionary} jc69
* @param {String} namespace
*/
function models.DNA.JC69._defineQ(jc69, namespace) {

models.DNA.generic.DefineQMatrix(jc69, namespace);
return jc69;
}
109 changes: 109 additions & 0 deletions res/TemplateBatchFiles/libv3/models/binary.bf
Original file line number Diff line number Diff line change
@@ -0,0 +1,109 @@
LoadFunctionLibrary ("../all-terms.bf");
LoadFunctionLibrary ("frequencies.bf");

/** @module models.binary */



/* Alphabet */
models.binary.alphabet = {{"0","1"}};
models.binary.dimensions = 2;


/**
* @name models.binary.generic.DefineQMatrix
* @param {Dictionary} modelSpec
* @param {String} namespace
*/
function models.binary.generic.DefineQMatrix (modelSpec, namespace) {

__alphabet = modelSpec [terms.alphabet];
assert (Type (__alphabet) == "Matrix" && Columns (__alphabet) == models.binary.dimensions, "Unsupported or missing alphabet '" + __alphabet + "'");

__modelType = modelSpec[terms.model.type];
if (Type (__modelType) == "None" || Type (__modelType) == "Number") {
__modelType = terms.global;
}
modelSpec[terms.model.type] = __modelType;
assert (__modelType == terms.local || __modelType == terms.global, "Unsupported or missing model type '" + __modelType + "'");

__rate_function = modelSpec [terms.model.q_ij];
assert (utility.IsFunction (__rate_function), "Missing q_ij callback in model specification");

__time_function = modelSpec [terms.model.time];
assert (utility.IsFunction (__time_function), "Missing time callback in model specification");


__rate_matrix = {models.binary.dimensions, models.binary.dimensions};
__rate_matrix [0][0] = "";

__rate_variation = model.generic.get_rate_variation (modelSpec);



__global_cache = {};

if (None != __rate_variation) {


__rp = Call (__rate_variation[terms.rate_variation.distribution], __rate_variation[terms.rate_variation.options], namespace);
__rate_variation [terms.id] = (__rp[terms.category])[terms.id];

parameters.DeclareCategory (__rp[terms.category]);
parameters.helper.copy_definitions (modelSpec[terms.parameters], __rp);
}

for (_rowChar = 0; _rowChar < models.binary.dimensions; _rowChar +=1 ){
__rate_matrix [_rowChar][_rowChar] = "";
for (_colChar = _rowChar + 1; _colChar < models.binary.dimensions; _colChar += 1) {

__rp = Call (__rate_function, __alphabet[_rowChar],
__alphabet[_colChar],
namespace,
__modelType);


if (None != __rate_variation) {
__rp = Call (__rate_variation[terms.rate_variation.rate_modifier],
__rp,
__alphabet[_rowChar],
__alphabet[_colChar],
namespace,
__rate_variation [terms.id]);
}

if (Abs (__rp[terms.model.rate_entry])) {
parameters.DeclareGlobal (__rp[terms.global], __global_cache);
parameters.helper.copy_definitions (modelSpec[terms.parameters], __rp);

__rate_matrix [_rowChar][_colChar] = __rp[terms.model.rate_entry];
__rate_matrix [_colChar][_rowChar] = __rp[terms.model.rate_entry];
continue;
}
__rate_matrix [_rowChar][_colChar] = "";
}
}

__rp = Call (__time_function, __modelType);


if (Abs (__rp)) {
((modelSpec[terms.parameters])[terms.local])[terms.timeParameter ()] = __rp;
modelSpec [terms.model.rate_matrix] = parameters.AddMultiplicativeTerm (__rate_matrix, __rp, 0);
} else {
modelSpec [terms.model.rate_matrix] = __rate_matrix;
}

}

/**
* @name models.binary.generic.Time
* @param option - does nothing
* @returns default time
*/
function models.binary.generic.Time (option) {
return terms.parameters.default_time;
}



76 changes: 76 additions & 0 deletions res/TemplateBatchFiles/libv3/models/binary/charbinary.bf
Original file line number Diff line number Diff line change
@@ -0,0 +1,76 @@
LoadFunctionLibrary("../binary.bf");
LoadFunctionLibrary("../parameters.bf");
LoadFunctionLibrary("../frequencies.bf");
LoadFunctionLibrary("../../UtilityFunctions.bf");

/** @module models.protein.REV */

/**
* @name models.protein.REV.ModelDescription
* @param {String} type
*/

lfunction models.binary.ModelDescription(type) {
return {
utility.getGlobalValue("terms.alphabet"): utility.getGlobalValue("models.binary.alphabet"),
utility.getGlobalValue("terms.description"): "General time reversible model for binary characters",
utility.getGlobalValue("terms.model.canonical"): 1, // is of the r_ij \times \pi_j form
utility.getGlobalValue("terms.model.reversible"): 1,
utility.getGlobalValue("terms.model.efv_estimate_name"): utility.getGlobalValue("terms.frequencies.binary"),
utility.getGlobalValue("terms.parameters"): {
utility.getGlobalValue("terms.global"): {},
utility.getGlobalValue("terms.local"): {},
utility.getGlobalValue("terms.model.empirical"): 1
},
utility.getGlobalValue("terms.model.type"): type,
utility.getGlobalValue("terms.model.get_branch_length"): "",
utility.getGlobalValue("terms.model.set_branch_length"): "models.generic.SetBranchLength",
utility.getGlobalValue("terms.model.constrain_branch_length"): "models.generic.constrain_branch_length",
utility.getGlobalValue("terms.model.frequency_estimator"): "frequencies.empirical.binary",
utility.getGlobalValue("terms.model.q_ij"): "models.binary._GenerateRate",
utility.getGlobalValue("terms.model.time"): "models.binary.generic.Time",
utility.getGlobalValue("terms.model.defineQ"): "models.binary._DefineQ",
utility.getGlobalValue("terms.model.post_definition"): "models.generic.post.definition"
};
}

lfunction models.binary._GenerateRate(fromChar, toChar, namespace, model_type) {
models.binary._generateRate.p = {};
models.binary._generateRate.p[model_type] = {};

if (fromChar > toChar) {
models.binary.parameter_name = "mu_" + toChar + fromChar;
} else {
models.binary.parameter_name = "mu_" + fromChar + toChar;
}

if (model_type == utility.getGlobalValue("terms.global")) {
models.binary.parameter_name = parameters.ApplyNameSpace(models.binary.parameter_name, namespace);
}

(models.binary._generateRate.p[model_type])[terms.binaryRate(fromChar, toChar)] = models.binary.parameter_name;

models.binary._generateRate.p[utility.getGlobalValue("terms.model.rate_entry")] = models.binary.parameter_name;

return models.binary._generateRate.p;

}

/**
* @name models.protein.REV._DefineQ
* @param {Dictionary} model definition
* @param {String} namespace
*/
lfunction models.binary._DefineQ(model_dict, namespace) {
models.binary.generic.DefineQMatrix (model_dict, namespace);
/*
if (model_dict[utility.getGlobalValue("terms.model.type")] == utility.getGlobalValue("terms.global")) {
parameters.SetConstraint( ((model_dict[utility.getGlobalValue("terms.parameters")])[utility.getGlobalValue("terms.global")])[terms.binaryRate("0", "1")], "1", "" );
}
if (model_dict[utility.getGlobalValue("terms.model.type")] == utility.getGlobalValue("terms.local")) {
parameters.SetConstraint(((model_dict[utility.getGlobalValue("terms.parameters")])[utility.getGlobalValue("terms.local")])[terms.binaryRate("0", "1")], "1", "");
}
*/

return model_dict;
}
19 changes: 17 additions & 2 deletions res/TemplateBatchFiles/libv3/models/frequencies.bf
Original file line number Diff line number Diff line change
Expand Up @@ -3,9 +3,22 @@ LoadFunctionLibrary("parameters.bf");
LoadFunctionLibrary("../UtilityFunctions.bf");
LoadFunctionLibrary("model_functions.bf");


/** @module frequencies */

/**
* Sets model's equilibrium frequency estimator to Binary chatacter 2x1 estimator
* @name frequencies.empirical.binary
* @param {Dictionary} model
* @param {String} namespace - does nothing
* @param {DataSetFilter} datafilter - does nothing
* @returns {Dictionary} updated model
*/
function frequencies.empirical.binary(model, namespace, datafilter) {
model = frequencies._aux.empirical.singlechar(model, namespace, datafilter);
model[terms.model.efv_estimate_name] = terms.frequencies.binary;
return model;
}

/**
* Sets model's equilibrium frequency estimator to equal frequencies
* @name frequencies.equal
Expand All @@ -15,7 +28,9 @@ LoadFunctionLibrary("model_functions.bf");
* @returns {Dictionary} updated model
*/
function frequencies.equal(model, namespace, datafilter) {
__N = Abs(model[terms.alphabet]);


__N = utility.Array1D(model[terms.alphabet]);
model[terms.efv_estimate] = {
__N,
1
Expand Down
Loading

0 comments on commit cd5adfd

Please sign in to comment.