/
diskstore_util.go
168 lines (145 loc) · 7.34 KB
/
diskstore_util.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
// Copyright (c) 2017-2018 Uber Technologies, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package diskstore
import (
"fmt"
"path/filepath"
"strconv"
"strings"
"github.com/uber/aresdb/utils"
)
const data string = "data"
const redologs string = "redologs"
const snapshots string = "snapshots"
const archiveBatches string = "archiving_batches"
// Utils for data hierarchy layout.
// Following this wiki:
// https://github.com/uber/aresdb/wiki/data_disk_layout
// General path related utils for disk store.
// getPathForTableShard is used to get the directory to store a table shard given path prefix, table name and shard id.
func getPathForTableShard(prefix, table string, shardID int) string {
tableShardDirPath := fmt.Sprintf("%s_%d", table, shardID)
return filepath.Join(prefix, data, tableShardDirPath)
}
// Redologs Utils
// Path on disk:
// {root_path}/data/{table_name}_{shard_id}/redologs/{creation_time}.redolog
//
// Sample:
// /var/gForceDb/data/myTable_0/redologs/1499971253.redolog
// /var/gForceDb/data/myTable_1/redologs/1499970221.redolog
// GetPathForTableRedologs is used to get the directory to store a table redolog given path prefix, table name and shard id.
func GetPathForTableRedologs(prefix, table string, shardID int) string {
tableShardPath := getPathForTableShard(prefix, table, shardID)
return filepath.Join(tableShardPath, redologs)
}
// GetPathForRedologFile is used to get on disk file path given path prefix, table name, shard id and creationTime.
func GetPathForRedologFile(prefix, table string, shardID int, creationTime int64) string {
redologDirPath := GetPathForTableRedologs(prefix, table, shardID)
redologName := fmt.Sprintf("%d.redolog", creationTime)
return filepath.Join(redologDirPath, redologName)
}
// Snapshot Utils
//Path on disk:
// {root_path}/data/{table_name}_{shard_id}/snapshots/{redlo_log}_{offset}/{batchID}/{columnID}.data
//
//Sample:
// /var/gForceDb/data/myTable_0/snapshots/1499970253_200/-2147483648/1.data
// /var/gForceDb/data/myTable_1/snapshots/1499970221_300/-2147483648/2.data
// GetPathForTableSnapshotDir is used to get the dir path of a snapshot given path prefix, table name and shard id.
func GetPathForTableSnapshotDir(prefix, table string, shardID int) string {
tableShardPath := getPathForTableShard(prefix, table, shardID)
return filepath.Join(tableShardPath, snapshots)
}
// GetPathForTableSnapshotDirPath is used to get the dir path of a snapshot given path prefix, table name, shard id
// redo log file and offset.
func GetPathForTableSnapshotDirPath(prefix, table string, shardID int, redoLogFile int64, offset uint32) string {
tableSnapshotDirPath := GetPathForTableSnapshotDir(prefix, table, shardID)
snapshotName := fmt.Sprintf("%d_%d", redoLogFile, offset)
return filepath.Join(tableSnapshotDirPath, snapshotName)
}
// GetPathForTableSnapshotBatchDir is used to get the dir path of a snapshot batch given path prefix, table name,
// shard id, redo log file, offset and batchID.
func GetPathForTableSnapshotBatchDir(prefix, table string, shardID int, redoLogFile int64, offset uint32,
batchID int) string {
snapshotDirPath := GetPathForTableSnapshotDirPath(prefix, table, shardID, redoLogFile, offset)
return filepath.Join(snapshotDirPath, strconv.Itoa(batchID))
}
// GetPathForTableSnapshotColumnFilePath is used to get the file path of a snapshot column given path prefix,
// table name, shard id, redo log file, offset, batchID and columnID
func GetPathForTableSnapshotColumnFilePath(prefix, table string, shardID int, redoLogFile int64, offset uint32,
batchID, columnID int) string {
snapshotBatchDirPath := GetPathForTableSnapshotBatchDir(prefix, table, shardID, redoLogFile, offset, batchID)
return filepath.Join(snapshotBatchDirPath, fmt.Sprintf("%d.data", columnID))
}
// Archive batches Utils
// Path on disk:
// {root_path}/data/{table_name}_{shard_id}/archiving_batches/{batch_id}_{batch_version}
// {root_path}/data/{table_name}_{shard_id}/archiving_batches/{batch_id}_{batch_version}/{columnID}.data
// Note:
// batch_id is UTC date
// batch_version is the cutoff seconds in unix time.
//
// Sample:
// /var/gForceDb/data/myTable_0/archiving_batches/2017-07-19_1499971253/1.data
// /var/gForceDb/data/myTable_0/archiving_batches/2017-07-19_1499971253/2.data
// GetPathForTableArchiveBatchRootDir is used to get root directory path for archive batch given path prefix, table name and shard id.
func GetPathForTableArchiveBatchRootDir(prefix, table string, shardID int) string {
tableShardPath := getPathForTableShard(prefix, table, shardID)
return filepath.Join(tableShardPath, archiveBatches)
}
// GetPathForTableArchiveBatchDir is used to get the dir path of an archive batch version given path prefix, table name, shard id, batch id and batch version.
func GetPathForTableArchiveBatchDir(prefix, table string, shardID int, batchID string, batchVersion uint32, seqNum uint32) string {
tableShardPath := getPathForTableShard(prefix, table, shardID)
var batchIDAndVersionPath string
// largest uint32 indicates there's no seqNum
if seqNum != 0 {
batchIDAndVersionPath = fmt.Sprintf("%s_%d-%d", batchID, batchVersion, seqNum)
} else {
batchIDAndVersionPath = fmt.Sprintf("%s_%d", batchID, batchVersion)
}
return filepath.Join(tableShardPath, archiveBatches, batchIDAndVersionPath)
}
// GetPathForTableArchiveBatchColumnFile is used to get the file path of a column inside an archive batch version given path prefix, table name, shard id, batch id, batch version and column id.
func GetPathForTableArchiveBatchColumnFile(prefix, table string, shardID int, batchID string, batchVersion uint32, seqNum uint32, columnID int) string {
tableArchiveBatchDir := GetPathForTableArchiveBatchDir(prefix, table, shardID, batchID, batchVersion, seqNum)
columnFileName := fmt.Sprintf("%d.data", columnID)
return filepath.Join(tableArchiveBatchDir, columnFileName)
}
// ParseBatchIDAndVersionName will parse a batchIDAndVersion into batchID and batchVersion+seqNum.
func ParseBatchIDAndVersionName(batchIDAndVersion string) (string, uint32, uint32, error) {
var batchID string
var batchVersion uint64
var seqNum uint64
var err error
splits := strings.Split(batchIDAndVersion, "_")
if len(splits) == 2 {
batchID = splits[0]
if !strings.Contains(splits[1], "-") {
batchVersion, err = strconv.ParseUint(splits[1], 10, 64)
} else {
versionSeqStr := strings.Split(splits[1], "-")
seqNum, err = strconv.ParseUint(versionSeqStr[1], 10, 64)
if err != nil {
return "", 0, 0, utils.StackError(err, "Failed to parse batch seqNum: %s", batchIDAndVersion)
}
batchVersion, err = strconv.ParseUint(versionSeqStr[0], 10, 64)
}
if err != nil {
return "", 0, 0, utils.StackError(nil, "Failed to parsed batch version: %s", batchIDAndVersion)
}
return batchID, uint32(batchVersion), uint32(seqNum), nil
}
return "", 0, 0, utils.StackError(nil, "Failed to parsed batch ID and version: %s", batchIDAndVersion)
}