-
Notifications
You must be signed in to change notification settings - Fork 4
/
export.go
82 lines (75 loc) · 1.89 KB
/
export.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
package export
import (
"encoding/json"
"fmt"
"os"
"github.com/tzapio/tzap/internal/logging/tl"
"github.com/tzapio/tzap/pkg/types"
)
func ExportEmbeddingToFile(e *types.Embeddings) {
if err := BatchEmbeddings(e); err != nil {
panic(err)
}
if err := ExportVectorsToFile(e, "./.tzap-data/files.json"); err != nil {
panic(err)
}
}
func ExportVectorsToFile(e *types.Embeddings, filePath string) error {
embeddingJSON, err := json.Marshal(e)
if err != nil {
return err
}
if err := os.WriteFile(filePath, embeddingJSON, 0644); err != nil {
return err
}
//fmt.Printf("Upserted files (count: %d): %s\n", len(e.Vectors), filePath)
return nil
}
func BatchEmbeddings(e *types.Embeddings) error {
batchSize := 100
batchNumber := 1
var batch []*types.Vector
for i, vector := range e.Vectors {
batch = append(batch, vector)
deletePreviousBatch()
if (i+1)%batchSize == 0 || i == len(e.Vectors)-1 {
filePath := fmt.Sprintf("./.tzap-data/files-%d.json", batchNumber)
batchEmbeddingJson := &types.Embeddings{
Vectors: batch,
}
err := ExportVectorsToFile(batchEmbeddingJson, filePath)
if err != nil {
return err
}
batch = nil
batchNumber++
}
}
return nil
}
func deletePreviousBatch() error {
// Remove previous embedding files
for i := 1; ; i++ {
filePath := fmt.Sprintf("./.tzap-data/files-%d.json", i)
if _, err := os.Stat(filePath); os.IsNotExist(err) {
break
}
if err := os.Remove(filePath); err != nil {
return err
}
}
return nil
}
func GetEmbeddingsFromFile(filePath string) (*types.Embeddings, error) {
tl.Logger.Println("Getting embeddings from file", filePath)
filecontent, err := os.ReadFile(filePath)
if err != nil {
return &types.Embeddings{}, err
}
var embeddings types.Embeddings
println(filecontent)
if err := json.Unmarshal(filecontent, &embeddings); err != nil {
return &types.Embeddings{}, err
}
return &embeddings, nil
}