/
genome_compatibility.go
191 lines (171 loc) · 6.91 KB
/
genome_compatibility.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
package genetics
import (
"github.com/yaricom/goNEAT/v2/neat"
"math"
)
/* ******** COMPATIBILITY CHECKING METHODS * ********/
// This function gives a measure of compatibility between two Genomes by computing a linear combination of three
// characterizing variables of their compatibility. The three variables represent PERCENT DISJOINT GENES,
// PERCENT EXCESS GENES, MUTATIONAL DIFFERENCE WITHIN MATCHING GENES. So the formula for compatibility
// is: disjoint_coeff * pdg + excess_coeff * peg + mutdiff_coeff * mdmg
// The three coefficients are global system parameters.
// The bigger returned value the less compatible the genomes.
//
// Fully compatible genomes has 0.0 returned.
func (g *Genome) compatibility(og *Genome, opts *neat.Options) float64 {
if opts.GenCompatMethod == neat.GenomeCompatibilityMethodLinear {
return g.compatLinear(og, opts)
} else {
return g.compatFast(og, opts)
}
}
// The compatibility checking method with linear performance depending on the size of the lognest genome in comparison.
// When genomes are small this method is compatible in performance with Genome#compatFast method.
// The compatibility formula remains the same: disjoint_coeff * pdg + excess_coeff * peg + mutdiff_coeff * mdmg
// where: pdg - PERCENT DISJOINT GENES, peg - PERCENT EXCESS GENES, and mdmg - MUTATIONAL DIFFERENCE WITHIN MATCHING GENES
//
// Fully compatible genomes has 0.0 returned.
func (g *Genome) compatLinear(og *Genome, opts *neat.Options) float64 {
numDisjoint, numExcess, mutDiffTotal, numMatching := 0.0, 0.0, 0.0, 0.0
size1, size2 := len(g.Genes), len(og.Genes)
maxGenomeSize := size2
if size1 > size2 {
maxGenomeSize = size1
}
var gene1, gene2 *Gene
for i, i1, i2 := 0, 0, 0; i < maxGenomeSize; i++ {
if i1 >= size1 {
numExcess += 1.0
i2++
} else if i2 >= size2 {
numExcess += 1.0
i1++
} else {
gene1 = g.Genes[i1]
gene2 = og.Genes[i2]
p1innov := gene1.InnovationNum
p2innov := gene2.InnovationNum
if p1innov == p2innov {
numMatching += 1.0
mutDiff := math.Abs(gene1.MutationNum - gene2.MutationNum)
mutDiffTotal += mutDiff
i1++
i2++
} else if p1innov < p2innov {
i1++
numDisjoint += 1.0
} else if p2innov < p1innov {
i2++
numDisjoint += 1.0
}
}
}
//fmt.Printf("num_disjoint: %.f num_excess: %.f mut_diff_total: %.f num_matching: %.f\n", num_disjoint, num_excess, mut_diff_total, num_matching)
// Return the compatibility number using compatibility formula
// Note that mut_diff_total/num_matching gives the AVERAGE difference between mutation_nums for any two matching
// Genes in the Genome. Look at disjointedness and excess in the absolute (ignoring size)
comp := opts.DisjointCoeff*numDisjoint + opts.ExcessCoeff*numExcess +
opts.MutdiffCoeff*(mutDiffTotal/numMatching)
return comp
}
// The faster version of genome compatibility checking. The compatibility check will start from the end of genome where
// the most of disparities are located - the novel genes with greater innovation ID are always attached at the end (see geneInsert).
// This has the result of complicating the routine because we must now invoke additional logic to determine which genes
// are excess and when the first disjoint gene is found. This is done with an extra integer:
// * excessGenesSwitch=0 // indicates to the loop that it is handling the first gene.
// * excessGenesSwitch=1 // Indicates that the first gene was excess and on genome 1.
// * excessGenesSwitch=2 // Indicates that the first gene was excess and on genome 2.
// * excessGenesSwitch=3 // Indicates that there are no more excess genes.
//
// The compatibility formula remains the same: disjoint_coeff * pdg + excess_coeff * peg + mutdiff_coeff * mdmg
// where: pdg - PERCENT DISJOINT GENES, peg - PERCENT EXCESS GENES, and mdmg - MUTATIONAL DIFFERENCE WITHIN MATCHING GENES
//
// Fully compatible genomes has 0.0 returned.
func (g *Genome) compatFast(og *Genome, opts *neat.Options) float64 {
list1Count, list2Count := len(g.Genes), len(og.Genes)
// First test edge cases
if list1Count == 0 && list2Count == 0 {
// Both lists are empty! No disparities, therefore the genomes are compatible!
return 0.0
}
if list1Count == 0 {
// All list2 genes are excess.
return float64(list2Count) * opts.ExcessCoeff
}
if list2Count == 0 {
// All list1 genes are excess.
return float64(list1Count) * opts.ExcessCoeff
}
excessGenesSwitch, numMatching := 0, 0
compatibility, mutDiff := 0.0, 0.0
list1Idx, list2Idx := list1Count-1, list2Count-1
gene1, gene2 := g.Genes[list1Idx], og.Genes[list2Idx]
for {
if gene2.InnovationNum > gene1.InnovationNum {
// Most common test case(s) at top for efficiency.
if excessGenesSwitch == 3 {
// No more excess genes. Therefore this mismatch is disjoint.
compatibility += opts.DisjointCoeff
} else if excessGenesSwitch == 2 {
// Another excess gene on genome 2.
compatibility += opts.ExcessCoeff
} else if excessGenesSwitch == 1 {
// We have found the first non-excess gene.
excessGenesSwitch = 3
compatibility += opts.DisjointCoeff
} else {
// First gene is excess, and is on genome 2.
excessGenesSwitch = 2
compatibility += opts.ExcessCoeff
}
// Move to the next gene in list2.
list2Idx--
} else if gene1.InnovationNum == gene2.InnovationNum {
// No more excess genes. It's quicker to set this every time than to test if is not yet 3.
excessGenesSwitch = 3
// Matching genes. Increase compatibility by MutationNum difference * coeff.
mutDiff += math.Abs(gene1.MutationNum - gene2.MutationNum)
numMatching++
// Move to the next gene in both lists.
list1Idx--
list2Idx--
} else {
// Most common test case(s) at top for efficiency.
if excessGenesSwitch == 3 {
// No more excess genes. Therefore this mismatch is disjoint.
compatibility += opts.DisjointCoeff
} else if excessGenesSwitch == 1 {
// Another excess gene on genome 1.
compatibility += opts.ExcessCoeff
} else if excessGenesSwitch == 2 {
// We have found the first non-excess gene.
excessGenesSwitch = 3
compatibility += opts.DisjointCoeff
} else {
// First gene is excess, and is on genome 1.
excessGenesSwitch = 1
compatibility += opts.ExcessCoeff
}
// Move to the next gene in list1.
list1Idx--
}
// Check if we have reached the end of one (or both) of the lists. If we have reached the end of both then
// we execute the first 'if' block - but it doesn't matter since the loop is not entered if both lists have
// been exhausted.
if list1Idx < 0 {
// All remaining list2 genes are disjoint.
compatibility += float64(list2Idx+1) * opts.DisjointCoeff
break
}
if list2Idx < 0 {
// All remaining list1 genes are disjoint.
compatibility += float64(list1Idx+1) * opts.DisjointCoeff
break
}
gene1, gene2 = g.Genes[list1Idx], og.Genes[list2Idx]
}
if numMatching > 0 {
compatibility += mutDiff * opts.MutdiffCoeff / float64(numMatching)
}
return compatibility
}