-
-
Notifications
You must be signed in to change notification settings - Fork 24
/
Copy pathget_bigquery_dates.sh
executable file
·75 lines (69 loc) · 1.35 KB
/
get_bigquery_dates.sh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
#!/bin/bash
#
# Gets a list of dates for a given BigQuery table.
#
# Example usage:
#
# sql/getBigQueryDates.sh har lighthouse
#
# Where the first argument is the dataset and the
# second argument is the table suffix.
#
# Example output:
#
# 2017_08_15
# 2017_08_01
# 2017_07_15
# 2017_07_01
# 2017_06_15
# 2017_06_01
#
# May be combined with the generateReports.sh script
# to generate a histogram for each date. For example:
#
# sql/get_bigquery_dates.sh runs pages | \
# xargs -I date sql/generate_reports.sh -h date
#
set -eo pipefail
DATASET=$1
SUFFIX=$2
MIN=$3
MAX=$4
if [ -z "$DATASET" ]; then
echo "Dataset argument required." >&2
echo "Example usage: sql/getBigQueryDates.sh har lighthouse" >&2
exit 1
fi
having=""
if [ ! -z "$MIN" ] || [ ! -z "$MAX" ]; then
having="HAVING
"
if [ ! -z "$MIN" ]; then
having+=" date >= \"$MIN\""
if [ ! -z "$MAX" ]; then
having+=" AND
"
fi
fi
if [ ! -z "$MAX" ]; then
having+=" date <= \"$MAX\""
fi
having+="
"
fi
query=$(cat <<EOM
#standardSQL
SELECT
CONCAT('20', SUBSTR(_TABLE_SUFFIX, 0, 8)) AS date
FROM
\`httparchive.$DATASET.20*\`
WHERE
_TABLE_SUFFIX LIKE '%_$SUFFIX%'
GROUP BY
date
${having}ORDER BY
date DESC
EOM
)
# Output only the resulting dates.
echo "$query" | bq --quiet --format csv --project_id httparchive query --max_rows 10000 | tail -n +2