-
Notifications
You must be signed in to change notification settings - Fork 4
/
filecache.go
79 lines (72 loc) · 2.7 KB
/
filecache.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
package embed
import (
"errors"
"io"
"io/fs"
"github.com/tzapio/tzap/internal/logging/tl"
"github.com/tzapio/tzap/pkg/types"
)
type FilestampCache struct {
filesTimestampsDB types.DBCollectionInterface[int64]
}
func NewFilestampCache(filesTimestampsDB types.DBCollectionInterface[int64]) *FilestampCache {
return &FilestampCache{filesTimestampsDB: filesTimestampsDB}
}
func (fc *FilestampCache) CheckFileCache(files []types.FileReader) (changedFiles map[string]string, unchangedFiles map[string]int64) {
tl.Logger.Println("Checking file cache. Files:", len(files))
changedFiles = map[string]string{}
unchangedFiles = map[string]int64{}
for _, file := range files {
fileName := file.FilePath()
fileStats, fileErr := file.Stat()
if errors.Is(fileErr, fs.ErrNotExist) {
tl.Logger.Println("File does not exist:", fileName)
changedFiles[fileName] = ""
continue
}
currentEditTime := fileStats.ModTime().UnixNano()
cachedEditTime, exists := fc.filesTimestampsDB.Get(fileName)
if exists && !isTimeDiffSignificant(currentEditTime, cachedEditTime) {
tl.DeepLogger.Printf("NO CHANGE %s. Old Edittime: %d, New Edittime: %d, TimeDiff: %d", fileName, cachedEditTime, currentEditTime, cachedEditTime-currentEditTime)
unchangedFiles[fileName] = cachedEditTime
continue
}
readCloser, err := file.Open()
if err != nil {
println(err.Error())
continue
}
fileContent, err := io.ReadAll(readCloser)
if err != nil {
println(err.Error())
continue
}
fileContentStr := string(fileContent)
tl.Logger.Printf("File %s has changed. Old Edittime: %d, New Edittime: %d, TimeDiff: %d", fileName, cachedEditTime, currentEditTime, cachedEditTime-currentEditTime)
changedFiles[fileName] = fileContentStr
}
tl.Logger.Println("Finished checking file cache. Changed files:", len(changedFiles), "Unchanged files:", len(unchangedFiles))
return changedFiles, unchangedFiles
}
func (fc *FilestampCache) CacheFilestamps(embeddings *types.Embeddings, files []types.FileReader) error {
if len(embeddings.Vectors) > 0 {
var keyvals []types.KeyValue[int64]
for _, vector := range embeddings.Vectors {
for _, fileReader := range files {
if fileReader.FilePath() == vector.Metadata.Filename {
fileStat, err := fileReader.Stat()
if err != nil {
return err
}
keyvals = append(keyvals, types.KeyValue[int64]{Key: vector.Metadata.Filename, Value: fileStat.ModTime().UnixNano()})
}
}
}
added, err := fc.filesTimestampsDB.BatchSet(keyvals)
if err != nil {
panic("failing to store changed files should not happend and has probably caused some kind of corruption")
}
tl.Logger.Printf("Added %d files to file cache. Total: %d", added, len(embeddings.Vectors))
}
return nil
}