-
Notifications
You must be signed in to change notification settings - Fork 0
/
pushToBigQuery
184 lines (162 loc) · 5.57 KB
/
pushToBigQuery
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
'use strict';
// refer to: https://www.youtube.com/watch?v=MonQPFuFKSk
/**
* Creates a menu in the UI.
*/
function onOpen() {
SpreadsheetApp.getUi()
.createMenu('__doThangs__')
.addItem('Upload', 'run_bq_pushes')
.addToUi();
}
/**
* Function to run from the UI menu.
*
* Uploads the sheets defined in the active sheet into BigQuery.
*/
function run_bq_pushes() {
SpreadsheetApp.getActiveSpreadsheet()
.toast("Pushing changes to bigquery", "Hello", 200);
// Column indices.
const SHEET_URL = 0;
const PROJECT_ID = 1;
const DATASET_ID = 2;
const TABLE_ID = 3;
const APPEND = 4;
const STATUS = 5;
// Get the data range rows, skipping the header (first) row.
//let sheet = SpreadsheetApp.getActiveSheet();
let spreadsheet = SpreadsheetApp.getActiveSpreadsheet();
let sheet = spreadsheet.getSheetByName('bqPush_');
let rows = sheet.getDataRange().getValues().slice(1);
// Run the sheetToBigQuery function for every row and write the status.
rows.forEach((row, i) => {
let status = sheetToBigQuery(
row[SHEET_URL],
row[PROJECT_ID],
row[DATASET_ID],
row[TABLE_ID],
row[APPEND],
);
sheet.getRange(i+2, STATUS+1).setValue(status);
});
SpreadsheetApp.getActiveSpreadsheet()
.toast("Changes pushed to bigquery", "Goodbye", 2);
}
/**
* Uploads a single sheet to BigQuery.
*
* @param {string} sheetUrl - The Google Sheet Url containing the data to upload.
* @param {string} projectId - Google Cloud Project ID.
* @param {string} datasetId - BigQuery Dataset ID.
* @param {string} tableId - BigQuery Table ID.
* @param {bool} append - Appends to BigQuery table if true, otherwise replaces the content.
*
* @return {string} status - Returns the status of the job.
*/
function sheetToBigQuery(sheetUrl, projectId, datasetId, tableId, append) {
let spreadsheet = SpreadsheetApp.getActiveSpreadsheet();
try {
createDatasetIfDoesntExist(projectId, datasetId);
} catch (e) {
return `${e}: Please verify your "Project ID" exists and you have permission to edit BigQuery`;
}
let sheet;
try {
//sheet = openSheetByUrl(sheetUrl);
sheet = spreadsheet.getSheetByName(sheetUrl);
} catch (e) {
return `${e}: Please verify the "Sheet URL" is pasted correctly`;
}
// Get the values from the sheet's data range as a matrix of values.
let rows = sheet.getDataRange().getValues();
// Normalize the headers (first row) to valid BigQuery column names.
// https://cloud.google.com/bigquery/docs/schemas#column_names
rows[0] = rows[0].map((header) => {
header = header.toLowerCase().replace(/[^\w]+/g, '_');
if (header.match(/^\d/))
header = '_' + header;
return header;
});
// Create the BigQuery load job config. For more information, see:
// https://developers.google.com/apps-script/advanced/bigquery
let loadJob = {
configuration: {
load: {
destinationTable: {
projectId: projectId,
datasetId: datasetId,
tableId: tableId
},
autodetect: true, // Infer schema from contents.
writeDisposition: append ? 'WRITE_APPEND' : 'WRITE_TRUNCATE',
}
}
};
// BigQuery load jobs can only load files, so we need to transform our
// rows (matrix of values) into a blob (file contents as string).
// For convenience, we convert the rows into a CSV data string.
// https://cloud.google.com/bigquery/docs/loading-data-local
let csvRows = rows.map(values =>
// We use JSON.stringify() to add "quotes to strings",
// but leave numbers and booleans without quotes.
// If a string itself contains quotes ("), JSON escapes them with
// a backslash as \" but the CSV format expects them to be
// escaped as "", so we replace all the \" with "".
values.map(value => JSON.stringify(value).replace(/\\"/g, '""'))
);
let csvData = csvRows.map(values => values.join(',')).join('\n');
let blob = Utilities.newBlob(csvData, 'application/octet-stream');
// Run the BigQuery load job.
try {
BigQuery.Jobs.insert(loadJob, projectId, blob);
} catch (e) {
return e;
}
Logger.log(
'Load job started. Click here to check your jobs: ' +
`https://console.cloud.google.com/bigquery?project=${projectId}&page=jobs`
);
// The status of a successful run contains the timestamp.
return `Last run: ${new Date()}`;
}
/**
* Creates a dataset if it doesn't exist, otherwise does nothing.
*
* @param {string} projectId - Google Cloud Project ID.
* @param {string} datasetId - BigQuery Dataset ID.
*/
function createDatasetIfDoesntExist(projectId, datasetId) {
try {
BigQuery.Datasets.get(projectId, datasetId);
} catch (err) {
let dataset = {
datasetReference: {
projectId: projectId,
datasetId: datasetId,
},
};
BigQuery.Datasets.insert(dataset, projectId);
Logger.log(`Created dataset: ${projectId}:${datasetId}`);
}
}
/**
* Opens the spreadsheet sheet (tab) with the given URL.
*
* @param {string} sheetUrl - Google Sheet Url.
*
* @returns {Sheet} - The sheet corresponding to the URL.
*
* @throws Throws an error if the sheet doesn't exist.
*/
function openSheetByUrl(sheetUrl) {
// Extract the sheet (tab) ID from the Url.
let sheetIdMatch = sheetUrl.match(/gid=(\d+)/);
let sheetId = sheetIdMatch ? sheetIdMatch[1] : null;
// From the open spreadsheet, get the sheet (tab) that matches the sheetId.
let spreadsheet = SpreadsheetApp.openByUrl(sheetUrl);
let sheet = spreadsheet.getSheets().filter(sheet => sheet.getSheetId() == sheetId)[0];
if (!sheet)
throw 'Sheet tab ID does not exist';
return sheet;
}