-
Notifications
You must be signed in to change notification settings - Fork 6
/
perfectAlign.go
87 lines (68 loc) · 3.02 KB
/
perfectAlign.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
package dnaThreeBit
import (
"log"
"math/bits"
)
// countRightMatches counts the number of perfectly matching bases between seqOne and seqTwo, starting
// a position startOne in seqOne and startTwo in seqTwo. The slices must be encoded with the threeBit format
// and in register with each other. The first comparison is startOne vs startTwo, and then to increasing
// positions.
func countRightMatches(seqOne []uint64, startOne int, lenOne int, seqTwo []uint64, startTwo int, lenTwo int) int {
const bitsPerBase int = 3
const bitsPerInt int = 64
const basesPerInt int = bitsPerInt / bitsPerBase
const ones uint64 = 0xFFFFFFFFFFFFFFFF
var i, j int
var seqDiff uint64 = 0
var bitMatches, matches int = 0, 0
var offsetOne int = (startOne % basesPerInt) * bitsPerBase
var offsetTwo int = (startTwo % basesPerInt) * bitsPerBase
if offsetOne != offsetTwo {
log.Fatalf("Different offsets when comparing sequences\n")
}
i = startOne / basesPerInt
j = startTwo / basesPerInt
iEnd := (lenOne + basesPerInt - 1) / basesPerInt
jEnd := (lenTwo + basesPerInt - 1) / basesPerInt
seqDiff = seqOne[i] ^ seqTwo[j]
seqDiff = seqDiff & (ones >> uint(offsetOne))
bitMatches = bits.LeadingZeros64(seqDiff)
matches = (bitMatches - offsetOne) / bitsPerBase
for i, j = i+1, j+1; i < iEnd && j < jEnd && bitMatches == bitsPerInt; i, j = i+1, j+1 {
seqDiff = seqOne[i] ^ seqTwo[j]
bitMatches = bits.LeadingZeros64(seqDiff)
matches += bitMatches / bitsPerBase
}
// TODO: I am not sure what to do here. I am currently assuming that the "padding" on the end of the two sequences is different so that it will not match return numbers.Min(numbers.Min(matches, lenOne-startOne), lenTwo-startTwo)
return matches
}
// countLeftMatches counts the number of perfectly matching bases between seqOne and seqTwo, starting
// a position startOne in seqOne and startTwo in seqTwo. The slices must be encoded with the threeBit format
// and in register with each other. The first comparison is startOne vs startTwo, and then to decreasing
// positions.
func countLeftMatches(seqOne []uint64, startOne int, seqTwo []uint64, startTwo int) int {
const bitsPerBase int = 3
const bitsPerInt int = 64
const basesPerInt int = bitsPerInt / bitsPerBase
const ones uint64 = 0xFFFFFFFFFFFFFFFF
var i, j int
var seqDiff uint64 = 0
var bitMatches, matches int = 0, 0
var offsetOne int = (startOne % basesPerInt) * bitsPerBase
var offsetTwo int = (startTwo % basesPerInt) * bitsPerBase
if offsetOne != offsetTwo {
log.Fatalf("Different offsets when comparing sequences\n")
}
i = startOne / basesPerInt
j = startTwo / basesPerInt
seqDiff = seqOne[i] ^ seqTwo[j]
seqDiff = seqDiff & (ones << uint(bitsPerInt-offsetOne-bitsPerBase))
bitMatches = bits.TrailingZeros64(seqDiff)
matches = (bitMatches - (bitsPerInt - offsetOne - bitsPerBase)) / bitsPerBase
for i, j = i-1, j-1; i >= 0 && j >= 0 && bitMatches == bitsPerInt; i, j = i-1, j-1 {
seqDiff = seqOne[i] ^ seqTwo[j]
bitMatches = bits.TrailingZeros64(seqDiff)
matches += bitMatches / bitsPerBase
}
return matches
}