-
Notifications
You must be signed in to change notification settings - Fork 0
/
seed.ts
125 lines (98 loc) · 2.9 KB
/
seed.ts
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
import path from "node:path";
import fs from "node:fs";
import { parse } from "csv-parse";
import { dump } from "js-yaml";
import { columnsSchema, Table } from "./schema/bq.js";
import * as modelSchema from "./schema/model.js";
import { env } from "./utils/env.js";
const { DEFAULT_DATASET_ID, DEFAULT_PROJECT_ID, DATASETS_DIR_PATH, DB_CONFIG_DIR_PATH } = env;
const log = (...entry: any[]) => console.log("[bq:seed:debug]", ...entry);
export async function seedCore(debug?: boolean) {
const models = fs
.readdirSync(DATASETS_DIR_PATH)
.filter((f) => path.extname(f) === ".csv")
.map((f) => path.basename(f).split(".")[0]);
if (debug) {
log("models", models);
}
if (models.length === 0) {
console.error(`[bq] Dataset not found in ${DATASETS_DIR_PATH}`);
process.exit(1);
}
const tables = await Promise.all(
models.map(async (model) => {
try {
const csvPath = path.join(DATASETS_DIR_PATH, `${model}.csv`);
const schemaJsonPath = path.join(DATASETS_DIR_PATH, `${model}.json`);
const data = await parseCSV(csvPath);
if (debug) {
log("parsedCSV", data.slice(0, 3));
}
let modelName = model;
// snake to camel
if (model.includes("_")) {
const [first, ...rest] = modelName.split("_");
modelName = [first, rest.map(([f, ...r]) => f.toUpperCase().concat(...r))].join("");
}
// @ts-ignore
const modelSchemaParser = modelSchema[`${modelName}Schema`].parse;
const parsedData = modelSchemaParser(data);
const columns = columnsSchema.parse(JSON.parse(fs.readFileSync(schemaJsonPath, "utf-8")));
const table = {
id: model,
columns,
data: debug ? parsedData.slice(0, 3) : parsedData,
};
if (debug) {
log("table", table);
}
return table;
} catch (e) {
console.error("[bq] CSV parse Error\n", e);
process.exit(1);
}
})
);
const dumpResult = dumpYAML(tables);
if (debug) {
log("dumpResult", dumpResult);
process.exit(0);
}
const to = `${DB_CONFIG_DIR_PATH}/data.yaml`;
fs.writeFileSync(to, dumpResult, "utf-8");
console.info(`[bq] Succeed to dump data to ${to}`);
}
const parseCSV = async (srcPath: string, limit?: number) => {
const records = [];
const parser = fs.createReadStream(srcPath).pipe(
parse({
columns: true,
cast: true,
// cast_date: true,
bom: true,
})
);
let index = 0;
for await (const record of parser) {
records.push(record);
index++;
if (limit === index) break;
}
return records;
};
const dumpYAML = (tables: Table[]) => {
const result = dump({
projects: [
{
id: DEFAULT_PROJECT_ID,
datasets: [
{
id: DEFAULT_DATASET_ID,
tables: tables,
},
],
},
],
});
return result;
};