-
Notifications
You must be signed in to change notification settings - Fork 0
/
snparray.go
77 lines (66 loc) · 1.92 KB
/
snparray.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
/* SPDX-License-Identifier: MPL-2.0
*
* Zymatik Nucleo - A Bioinformatics library for Go.
* Copyright (C) 2024 Damian Peckett <damian@pecke.tt>
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the Mozilla Public License v2.0.
*
* You should have received a copy of the Mozilla Public License v2.0
* along with this program. If not, see <https://mozilla.org/MPL/2.0/>.
*/
// Package snparray provides readers for common SNP array file formats.
// Such as direct-to-consumer genetic testing services like 23andMe and AncestryDNA.
package snparray
import (
"bytes"
"fmt"
"io"
"github.com/zymatik-com/genobase/types"
)
type SNP struct {
RSID string
Chromosome types.Chromosome
Position int64
Genotype string
}
// Codec is a SNP array file format encoder/decoder.
type Codec interface {
// Detect returns true if the file format is detected.
Detect(r io.Reader) (bool, error)
// Open opens the SNP array file and returns a lazy SNP reader.
Open(r io.Reader) (Reader, error)
}
// Reader is a lazy SNP reader.
type Reader interface {
// Reference returns the reference assembly used by the SNP array.
Reference() types.Reference
// Read reads the next SNP from the file. It returns io.EOF if there are no
// more SNPs.
Read() (*SNP, error)
}
var codecs = []Codec{
&twentyThreeAndMeCodec{},
&ancestryDNACodec{},
&genericCSVCodec{},
&genericTSVCodec{},
}
// Open opens the SNP array file and returns a lazy SNP reader.
func Open(r io.Reader) (Reader, error) {
// Peak at the first few lines to determine the file format.
buf := make([]byte, 1024)
n, err := r.Read(buf)
if err != nil {
return nil, err
}
for _, codec := range codecs {
ok, err := codec.Detect(bytes.NewReader(buf[:n]))
if err != nil {
return nil, err
}
if ok {
return codec.Open(io.MultiReader(bytes.NewReader(buf[:n]), r))
}
}
return nil, fmt.Errorf("unknown snparray format")
}