Skip to content

Commit

Permalink
Working implementation of +F. Again, proteinfitter not guaranteed to …
Browse files Browse the repository at this point in the history
…work, but rest is good
  • Loading branch information
sjspielman committed Sep 28, 2017
1 parent e88c9c6 commit b2f7042
Show file tree
Hide file tree
Showing 5 changed files with 59 additions and 37 deletions.
26 changes: 11 additions & 15 deletions res/TemplateBatchFiles/ProteinAnalyses/ProteinGTRFit.bf
Expand Up @@ -97,37 +97,33 @@ else {

}



protein_gtr.baseline_Rij = plusF_helper.Rij_options[protein_gtr.baseline_model]; // Defined in helper
if (protein_gtr.use_rate_variation == "Yes"){
protein_gtr.final_baseline_model = "protein_gtr.plusF.ModelDescription.withGamma";
protein_gtr.baseline_model_name = protein_gtr.baseline_model + "+F, with Gamma rates";
protein_gtr.baseline_model_desc = "protein_gtr.Baseline.ModelDescription.withGamma";
protein_gtr.rev_model_branch_lengths = "protein_gtr.REV.ModelDescription.withGamma";
} else{
protein_gtr.final_baseline_model = "protein_gtr.plusF.ModelDescription";
protein_gtr.baseline_model_name = protein_gtr.baseline_model + "+F";
protein_gtr.baseline_model_desc = "protein_gtr.Baseline.ModelDescription";
protein_gtr.rev_model_branch_lengths = "protein_gtr.REV.ModelDescription";
}
/********************************************************************************************************************/


protein_gtr.queue = mpi.CreateQueue ({ "Headers" : utility.GetListOfLoadedModules ("libv3/") ,
"Functions" :
protein_gtr.queue = mpi.CreateQueue ({ terms.mpi.Headers : utility.GetListOfLoadedModules ("libv3/") ,
terms.mpi.Functions :
{
{"protein_gtr.REV.ModelDescription",
"protein_gtr.REV.ModelDescription.withGamma",
"protein_gtr.REV.ModelDescription.freqs",
"protein_gtr.plusF.ModelDescription",
"protein_gtr.plusF._GenerateRate",
"protein_gtr.plusF.frequencies"
"protein_gtr.Baseline.ModelDescription.withGamma",
"protein_gtr.Baseline.ModelDescription",
}
},

"Variables" : {{
terms.mpi.Variables : {{
"protein_gtr.shared_EFV",
"protein_gtr.final_baseline_model",
"protein_gtr.baseline_model_desc",
"protein_gtr.rev_model_branch_lengths",
"protein_gtr.baseline_Rij"

}}
});

Expand All @@ -140,7 +136,7 @@ protein_gtr.phase_key = protein_gtr.baseline_phase;


/*************************** STEP ONE ***************************
Perform an initial fit of Baseline model+4G to the data (or load cached fit.)
Perform an initial fit of Baseline model+F(+/-4G) to the data (or load cached fit.)
*****************************************************************/
console.log("\n\n[PHASE 1] Performing initial branch length optimization using " + protein_gtr.baseline_model);

Expand Down
51 changes: 33 additions & 18 deletions res/TemplateBatchFiles/ProteinAnalyses/ProteinGTRFit_helper.ibf
Expand Up @@ -8,9 +8,7 @@ function protein_gtr.load_cached_options() {
protein_gtr.tolerance = (protein_gtr.analysis_results[utility.getGlobalValue("terms.json.options")])[utility.getGlobalValue("protein_gtr.options.tolerance")];
protein_gtr.baseline_model = (protein_gtr.analysis_results[utility.getGlobalValue("terms.json.options")])[utility.getGlobalValue("protein_gtr.options.baseline_model")];
protein_gtr.use_rate_variation = (protein_gtr.analysis_results[utility.getGlobalValue("terms.json.options")])[utility.getGlobalValue("protein_gtr.options.rate_variation")];





/*** Cache options can in theory be saved as numbers if options piped in, although not typically. Remap to strings. ****/
if (Type(protein_gtr.convergence_type) == "Number"){
Expand All @@ -22,7 +20,6 @@ function protein_gtr.load_cached_options() {
if (Type(protein_gtr.protein_gtr.use_rate_variation) == "Number"){
protein_gtr.use_rate_variation = utility.SwapKeysAndValues(utility.MatrixToDict(protein_gtr.rate_variation_options))[protein_gtr.use_rate_variation*2];
}
return 0;
}

function protein_gtr.save_options() {
Expand All @@ -35,18 +32,43 @@ function protein_gtr.save_options() {
"number of datasets": protein_gtr.file_list_count};

io.WriteCacheToFile (^"protein_gtr.cache_file", ^"protein_gtr.analysis_results");
return 0;
}








/* Model definitions, in particular for models with rate variation */
//------------------------------------------------------------------------------------------------------------------------

/**
* @name models.protein.Baseline.ModelDescription.withGamma
* @description Define baseline (standard matrix) model w/ +F and *no* four-category gamma rate variation
*/
function protein_gtr.Baseline.ModelDescription(type){
def = Call( models.protein.empirical.plusF_generators[protein_gtr.baseline_model], type);
return def;
}
/**
* @name models.protein.Baseline.ModelDescription.withGamma
* @description Define baseline (standard matrix) model w/ +F and *yes* four-category gamma rate variation
*/
function protein_gtr.Baseline.ModelDescription.withGamma(type){
def = protein_gtr.Baseline.ModelDescription(type);
def [utility.getGlobalValue("terms.model.rate_variation")] = rate_variation.types.Gamma.factory ({utility.getGlobalValue("terms.rate_variation.bins") : 4});
return def;
}


/**
* @name models.protein.REV.ModelDescription.withGamma
* @description Define REV model with four-category gamma rate variation
*/
lfunction models.protein.REV.ModelDescription.withGamma (options) {
def = models.protein.REV.ModelDescription (options);
lfunction models.protein.REV.ModelDescription.withGamma (type) {
def = models.protein.REV.ModelDescription (type);
def [utility.getGlobalValue("terms.model.rate_variation")] = rate_variation.types.Gamma.factory ({utility.getGlobalValue("terms.rate_variation.bins") : 4});
return def;
};
Expand Down Expand Up @@ -138,21 +160,14 @@ function protein_gtr.fitBaselineToFile (filename) {
protein_gtr.full_trees = utility.Map (protein_gtr.partitions_and_trees, "_value_", '_value_[terms.data.tree]');
protein_gtr.full_data_filter = utility.Map (protein_gtr.filter_specification, "_value_", "_value_[terms.data.name]");



/******************** NORMALIZE MATRIX WITH DATA FREQUENCIES HERE *************************/
utility.ToggleEnvVariable("COUNT_GAPS_IN_FREQUENCIES", 0);
protein_gtr.empirical_frequencies = frequencies._aux.empirical.collect_data(protein_gtr.full_data_filter, 1, 1, 1);
utility.ToggleEnvVariable("COUNT_GAPS_IN_FREQUENCIES", None);

protein_gtr.normalized_qij = plusF_helper.BuildCustomNormalizedQ(protein_gtr.empirical_frequencies, protein_gtr.baseline_Rij, models.protein.alphabet);


protein_gtr.baseline_mle = estimators.FitSingleModel_Ext(protein_gtr.full_data_filter,
protein_gtr.full_trees,
protein_gtr.final_baseline_model,
protein_gtr.baseline_model_desc,
None,
None);

//

protein_gtr.baseline_mle - terms.global; // delete empty key
return protein_gtr.baseline_mle;
}
Expand Down
Expand Up @@ -240,7 +240,6 @@ io.ReportProgressMessageMD ("relative_prot_rates", "Stats", "* [95% Range] " +



tree_definition = utility.Map (relative_prot_rates.partitions_and_trees, "_partition_", '_partition_[terms.data.tree]');
io.SpoolJSON ({ terms.json.input : {terms.json.file: relative_prot_rates.alignment_info[terms.data.file],
terms.json.sequences: relative_prot_rates.alignment_info[terms.data.sequences],
terms.json.sites: relative_prot_rates.alignment_info[terms.data.sites],
Expand Down Expand Up @@ -319,6 +318,8 @@ lfunction relative_prot_rates.store_results (node, result, arguments) {
"
);



return rate_statistics;

}
Expand Down
13 changes: 10 additions & 3 deletions res/TemplateBatchFiles/libv3/models/model_functions.bf
Expand Up @@ -207,11 +207,18 @@ lfunction model.generic.get_rate_variation (model_spec) {
*/
function model.generic.DefineModel (model_spec, id, arguments, data_filter, estimator_type) {


// Basic model definition
model.generic.DefineModel.model = utility.CallFunction (model_spec, arguments);


// Add data filter information to model description
models.generic.AttachFilter (model.generic.DefineModel.model, data_filter);



// Set Q field
model.generic.DefineModel.model = Call (model.generic.DefineModel.model [terms.model.defineQ], model.generic.DefineModel.model, id);


// Define type of frequency estimator
Expand All @@ -225,8 +232,6 @@ function model.generic.DefineModel (model_spec, id, arguments, data_filter, esti
id,
data_filter);

// Set Q field
model.generic.DefineModel.model = Call (model.generic.DefineModel.model [terms.model.defineQ], model.generic.DefineModel.model, id);


// Define id's for frequencies, Q, and id
Expand Down Expand Up @@ -267,6 +272,7 @@ function model.generic.DefineMixtureModel (model_spec, id, arguments, data_filte
// for mixture models this will define the mixture components as well
model.generic.DefineModel.model = Call (model.generic.DefineModel.model [terms.model.defineQ], model.generic.DefineModel.model, id);


if (estimator_type != None) {
model.generic.DefineModel.model [terms.model.frequency_estimator] = estimator_type;
}
Expand Down Expand Up @@ -398,7 +404,8 @@ function models.generic.SetBranchLength (model, value, parameter) {
* @returns 0
*/
lfunction models.generic.AttachFilter (model, filter) {



if (Type (filter) != "String") {
utility.ForEach (filter, "_filter_", "models.generic.AttachFilter (`&model`, _filter_)");
model[utility.getGlobalValue("terms.model.data")] = filter;
Expand Down
3 changes: 3 additions & 0 deletions res/TemplateBatchFiles/libv3/models/protein/empirical.bf
Expand Up @@ -78,6 +78,9 @@ lfunction models.protein.empirical._GenerateRate(rateDict, fromChar, toChar, nam
*/
lfunction models.protein.empirical._DefineQ(model_dict, namespace) {

// Call frequencies here. Will be repeated in model.generic.DefineModel, but we are ok with that.
frequencies._aux.empirical.singlechar(model_dict, namespace, model_dict[utility.getGlobalValue("terms.model.data")]);

models.protein.empirical._NormalizeEmpiricalRates(model_dict, namespace);
models.protein.empirical.DefineQMatrix (model_dict, namespace);
return model_dict;
Expand Down

0 comments on commit b2f7042

Please sign in to comment.