-
Notifications
You must be signed in to change notification settings - Fork 0
/
generate_flat_table.sh
executable file
·190 lines (155 loc) · 5.66 KB
/
generate_flat_table.sh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
#!/bin/bash
set -e
# default setting
scale=1
partition_interval="1 year"
ssb_database_name=""
enum_table_name=f_lz4_mars2
access_method="mars3"
function show_help()
{
cat << EOF
Generate fat table with dataset in given scale.
Args:
-h
Show help message.
-D [custom_database_name]
Specify custom database name to generate a flatten table.
-s [scale]
Required, scale of the generated dataset in gigabytes(GB), and specify this option to
create the flatten along with desired dataset.
-i [partition_interval]
Optional, default '1 year', specify custom partition interval.
-a [access_method]
Optional, default 'mars3', support "mars2,mars3"
Usage:
Generate flat table with 1GB dataset.
./generate_flat_table.sh -s 1
Generate flat table with monthly partition.
./generate_flat_table.sh -s 1 -i '1 month'
EOF
}
function parse_args()
{
OPTIND=1
while getopts ":h :o:D:i:m:p:e:t:c:s:a:x" opt; do
case "$opt" in
s) scale="$OPTARG" ;;
i)
partition_interval="$OPTARG";;
D) ssb_database_name="$OPTARG";;
h)
show_help
exit 0
;;
a) access_method="$OPTARG";;
\?)
printf "%s\n" "Invalid Option! -$OPTARG" >&2
exit 1
;;
:)
printf "%s\n" "-$OPTARG requires an argument" >&2
exit 1
;;
esac
done
# Check whether access method is valid
case $access_method in
'mars2' | 'mars3') ;;
*)
printf "%s\n" "Invalid access method! -$access_method" >&2
exit 1
;;
esac
shift "$((OPTIND - 1))"
}
parse_args $@
if [ "$scale" -gt "1000" ]; then
echo "[WARN] You specified scale=${scale} larger than 1000GB, which is still an experimental feature."
fi
# Separated database name for given scale.
if [ -z "${ssb_database_name}" ]; then
ssb_database_name="ssb_scale_${scale}"
fi
if [ "$( psql -Aqt -P pager=off -d postgres -c "SELECT 1 FROM pg_database WHERE datname='${ssb_database_name}'" )" = '1' ]
then
echo ""
else
echo "[ERROR] database ${ssb_database_name} does not exist please generate and import the dataset before generate the flatten table..."
exit 1
fi
if [ "$( psql -Aqt -P pager=off -d ${ssb_database_name} -c "SELECT 1 FROM pg_class WHERE relname='${enum_table_name}'" )" = '1' ]
then
echo "[ERROR] table [${enum_table_name}] exists in database [${ssb_database_name}], you may drop this table or use a custom database."
echo ""
exit 1
fi
curdir=$(pwd)
internal_dir="$curdir/generate_flat_table"
# dynamic int vs bigint against different scale=100..1000,
# using bigint for scale > 100
dynaint="int"
if [ "$scale" -gt "100" ]; then
dynaint=bigint
fi
echo "Creating enum types..."
psql -Aqtbe -P pager=off -v ON_ERROR_STOP=ON -d $ssb_database_name -c "drop type if exists e_c_nation cascade; create type e_c_nation as enum ('ALGERIA', 'ARGENTINA', 'BRAZIL', 'CANADA', 'CHINA', 'EGYPT', 'ETHIOPIA', 'FRANCE', 'GERMANY', 'INDIA', 'INDONESIA', 'IRAN', 'IRAQ', 'JAPAN', 'JORDAN', 'KENYA', 'MOROCCO', 'MOZAMBIQUE', 'PERU', 'ROMANIA', 'RUSSIA',
'SAUDI ARABIA', 'UNITED KINGDOM', 'UNITED STATES', 'VIETNAM')"
psql -Aqtbe -P pager=off -v ON_ERROR_STOP=ON -d $ssb_database_name -c "drop type if exists e_c_region cascade; create type e_c_region as enum ('AFRICA', 'AMERICA', 'ASIA', 'EUROPE', 'MIDDLE EAST');"
psql -Aqtbe -P pager=off -v ON_ERROR_STOP=ON -d $ssb_database_name -c "drop type if exists e_p_mfgr cascade; create type e_p_mfgr as enum ('MFGR#1', 'MFGR#2', 'MFGR#3', 'MFGR#4', 'MFGR#5');"
# mars extension is required before populating a table using mars2 engine.
psql -Aqtbe -P pager=off -v ON_ERROR_STOP=ON -d $ssb_database_name -c "create extension if not exists matrixts"
# create table with enum columns
psql -Aqtbe -P pager=off -v ON_ERROR_STOP=ON \
-d $ssb_database_name \
-v withopts= \
-v tname=${enum_table_name} \
-v dynaint=${dynaint} \
-v par="'${partition_interval}'" \
-v e_lo_orderpriority=text \
-v e_lo_shipmode=text \
-v e_c_city=text \
-v e_c_nation=e_c_nation \
-v e_c_region=e_c_region \
-v e_c_mktsegment=text \
-v e_s_city=text \
-v e_s_nation=e_c_nation \
-v e_s_region=e_c_region \
-v e_p_mfgr=e_p_mfgr \
-v e_p_category=text \
-v e_p_brand=text \
-v e_p_color=text \
-v e_p_type=text \
-v e_p_container=text \
-f ${internal_dir}/${access_method}/ddl.sql
psql -Aqtbe -P pager=off -v ON_ERROR_STOP=ON \
-d $ssb_database_name \
-v withopts= \
-v dynaint=${dynaint} \
-v tname=${enum_table_name} \
-v e_lo_orderpriority=text \
-v e_lo_shipmode=text \
-v e_c_city=text \
-v e_c_nation=e_c_nation \
-v e_c_region=e_c_region \
-v e_c_mktsegment=text \
-v e_s_city=text \
-v e_s_nation=e_c_nation \
-v e_s_region=e_c_region \
-v e_p_mfgr=e_p_mfgr \
-v e_p_category=text \
-v e_p_brand=text \
-v e_p_color=text \
-v e_p_type=text \
-v e_p_container=text \
-f ${internal_dir}/${access_method}/prejoin.sql
psql -Aqtbe -P pager=off -v ON_ERROR_STOP=ON -d $ssb_database_name \
-c "ANALYZE ${enum_table_name}"
psql -Aqtbe -P pager=off -v ON_ERROR_STOP=ON -d $ssb_database_name \
-c "VACUUM FULL ${enum_table_name};"
echo ""
cat << EOF
Flatten table ${enum_table_name} is successfully generated in database ${ssb_database_name}.
Now you can run SSB benchmark.
./ssb.sh -s ${scale} -D ${ssb_database_name}
EOF