forked from xitongsys/parquet-go
/
gettype.go
111 lines (91 loc) · 3.21 KB
/
gettype.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
package schema
import (
"reflect"
"fmt"
"github.com/xitongsys/parquet-go/parquet"
"github.com/xitongsys/parquet-go/types"
)
// Get object type from schema by reflect
func (self *SchemaHandler) GetTypes() []reflect.Type {
ln := int32(len(self.SchemaElements))
elements := make([][]int32, ln)
for i := 0; i<int(ln); i++ {
elements[i] = []int32{}
}
elementTypes := make([]reflect.Type, ln)
var pos int32 = 0
stack := make([][2]int32, 0) //stack item[0]: index of schemas; item[1]: numChildren
for pos < ln || len(stack) > 0 {
if len(stack) == 0 || stack[len(stack)-1][1] > 0 {
if len(stack) > 0 {
stack[len(stack)-1][1]--
idx := stack[len(stack)-1][0]
elements[idx] = append(elements[idx], pos)
}
item := [2]int32{pos, self.SchemaElements[pos].GetNumChildren()}
stack = append(stack, item)
pos++
} else {
curlen := len(stack) - 1
idx := stack[curlen][0]
nc := self.SchemaElements[idx].GetNumChildren()
pT, cT := self.SchemaElements[idx].Type, self.SchemaElements[idx].ConvertedType
rT := self.SchemaElements[idx].RepetitionType
if nc == 0 {
if *rT != parquet.FieldRepetitionType_REPEATED {
elementTypes[idx] = types.ParquetTypeToGoReflectType(pT, rT)
} else {
elementTypes[idx] = reflect.SliceOf(types.ParquetTypeToGoReflectType(pT, nil))
}
} else {
if cT != nil && *cT == parquet.ConvertedType_LIST &&
len(elements[idx]) == 1 &&
self.GetInName(int(elements[idx][0])) == "List" &&
len(elements[elements[idx][0]]) == 1 &&
self.GetInName(int(elements[elements[idx][0]][0])) == "Element" {
cidx := elements[elements[idx][0]][0]
elementTypes [idx] = reflect.SliceOf(elementTypes[cidx])
} else if cT != nil && *cT == parquet.ConvertedType_MAP &&
len(elements[idx]) == 1 &&
self.GetInName(int(elements[idx][0])) == "Key_value" &&
len(elements[elements[idx][0]]) == 2 &&
self.GetInName(int(elements[elements[idx][0]][0])) == "Key" &&
self.GetInName(int(elements[elements[idx][0]][1])) == "Value"{
kIdx, vIdx := elements[elements[idx][0]][0], elements[elements[idx][0]][1]
kT, vT := elementTypes[kIdx], elementTypes[vIdx]
elementTypes[idx] = reflect.MapOf(kT, vT)
}else {
fields := []reflect.StructField{}
for _, ci := range elements[idx] {
fields = append(fields, reflect.StructField{
Name: self.Infos[ci].InName,
Type: elementTypes[ci],
})
}
structType := reflect.StructOf(fields)
if rT == nil || *rT == parquet.FieldRepetitionType_REQUIRED {
elementTypes[idx] = structType
} else if *rT == parquet.FieldRepetitionType_OPTIONAL {
elementTypes[idx] = reflect.New(structType).Type()
} else if *rT == parquet.FieldRepetitionType_REPEATED {
elementTypes[idx] = reflect.SliceOf(structType)
}
}
}
stack = stack[:curlen]
}
}
return elementTypes
}
func (self *SchemaHandler) GetType(prefixPath string) (reflect.Type, error) {
prefixPath, err := self.ConvertToInPathStr(prefixPath)
if err != nil {
return nil, err
}
ts := self.GetTypes()
if idx, ok := self.MapIndex[prefixPath]; !ok {
return nil, fmt.Errorf("[GetType] Can't find %v", prefixPath)
} else {
return ts[idx], nil
}
}