Skip to content

Commit 7f4dfa4

Browse files
committed
finish
1 parent d87102c commit 7f4dfa4

File tree

2 files changed

+40
-22
lines changed

2 files changed

+40
-22
lines changed

.gitignore

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
.ipynb_checkpoints/
22
.DS_Store
33
mimic-iv-1.0
4-
mimic*
4+
mimic*
5+
data.db

csv2sqlite-streaming.ipynb

Lines changed: 38 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -33,21 +33,22 @@
3333
"id": "5287cd41-7680-4ffc-be4e-cbf1098b7a86",
3434
"metadata": {
3535
"execution": {
36-
"iopub.execute_input": "2022-04-01T22:57:50.828795Z",
37-
"iopub.status.busy": "2022-04-01T22:57:50.828795Z",
38-
"iopub.status.idle": "2022-04-01T22:57:50.843795Z",
39-
"shell.execute_reply": "2022-04-01T22:57:50.843795Z",
40-
"shell.execute_reply.started": "2022-04-01T22:57:50.828795Z"
36+
"iopub.execute_input": "2022-04-02T21:23:32.756771Z",
37+
"iopub.status.busy": "2022-04-02T21:23:32.756771Z",
38+
"iopub.status.idle": "2022-04-02T21:23:33.178843Z",
39+
"shell.execute_reply": "2022-04-02T21:23:33.178843Z",
40+
"shell.execute_reply.started": "2022-04-02T21:23:32.756771Z"
4141
},
4242
"tags": []
4343
},
4444
"outputs": [
4545
{
46-
"ename": "SyntaxError",
47-
"evalue": "invalid syntax (<ipython-input-1-391b4b0b0e5d>, line 82)",
48-
"output_type": "error",
49-
"traceback": [
50-
"\u001b[1;36m File \u001b[1;32m\"<ipython-input-1-391b4b0b0e5d>\"\u001b[1;36m, line \u001b[1;32m82\u001b[0m\n\u001b[1;33m to_sql(file_path.stem, conn, if_exists='append', index=False)\u001b[0m\n\u001b[1;37m ^\u001b[0m\n\u001b[1;31mSyntaxError\u001b[0m\u001b[1;31m:\u001b[0m invalid syntax\n"
46+
"name": "stdout",
47+
"output_type": "stream",
48+
"text": [
49+
"processing data\\grade.csv\n",
50+
"processing data\\person.csv\n",
51+
"done\n"
5152
]
5253
}
5354
],
@@ -99,7 +100,7 @@
99100
" if os.path.exists(file_path):\n",
100101
" os.remove(file_path)\n",
101102
"\n",
102-
"def create_db(input_dir, output_db, flush_size=1_000_000):\n",
103+
"def create_db(input_dir, output_db, flush_size=1_000_000, encoding='utf-8'):\n",
103104
" delete_file_if_exists(output_db)\n",
104105
" pathlib.Path(output_db).touch()\n",
105106
" \n",
@@ -115,7 +116,7 @@
115116
" \n",
116117
" cur.execute(sql)\n",
117118
" \n",
118-
" with open(file_path, 'r') as fp:\n",
119+
" with open(file_path, 'r', encoding=encoding) as fp:\n",
119120
" lines = []\n",
120121
" lines.append(headers)\n",
121122
" \n",
@@ -136,7 +137,7 @@
136137
" .to_sql(file_path.stem, conn, if_exists='append', index=False)\n",
137138
" \n",
138139
" \n",
139-
"create_db('./mimic-iv-1.0', 'mimic.db')\n",
140+
"create_db('./data', 'data.db')\n",
140141
"print('done')"
141142
]
142143
},
@@ -150,22 +151,38 @@
150151
},
151152
{
152153
"cell_type": "code",
153-
"execution_count": null,
154+
"execution_count": 2,
154155
"id": "55291a90-d156-4ae5-8e3c-ea3e516c1019",
155156
"metadata": {
156157
"execution": {
157-
"iopub.status.busy": "2022-04-01T22:57:50.844796Z",
158-
"iopub.status.idle": "2022-04-01T22:57:50.844796Z",
159-
"shell.execute_reply": "2022-04-01T22:57:50.844796Z"
158+
"iopub.execute_input": "2022-04-02T21:23:33.179846Z",
159+
"iopub.status.busy": "2022-04-02T21:23:33.179846Z",
160+
"iopub.status.idle": "2022-04-02T21:23:33.193843Z",
161+
"shell.execute_reply": "2022-04-02T21:23:33.193843Z",
162+
"shell.execute_reply.started": "2022-04-02T21:23:33.179846Z"
160163
},
161164
"tags": []
162165
},
163-
"outputs": [],
166+
"outputs": [
167+
{
168+
"name": "stdout",
169+
"output_type": "stream",
170+
"text": [
171+
"(1, 'john', 'smith', 32, 155.5, 1, 1, 1, 99.9)\n",
172+
"(1, 'john', 'smith', 32, 155.5, 1, 2, 1, 88.8)\n",
173+
"(1, 'john', 'smith', 32, 155.5, 1, 3, 1, 92.2)\n",
174+
"(2, 'jane', 'clinton', 28, 133.3, 0, 4, 2, 88.9)\n",
175+
"(2, 'jane', 'clinton', 28, 133.3, 0, 5, 2, 92.2)\n",
176+
"(2, 'jane', 'clinton', 28, 133.3, 0, 6, 2, 84.4)\n"
177+
]
178+
}
179+
],
164180
"source": [
165-
"with sqlite3.connect('mimic.db') as conn:\n",
181+
"with sqlite3.connect('data.db') as conn:\n",
166182
" sql = '''\n",
167-
" SELECT count(*) as total\n",
168-
" FROM admissions\n",
183+
" SELECT *\n",
184+
" FROM person p\n",
185+
" JOIN grade g ON p.id = g.person_id\n",
169186
" '''\n",
170187
" \n",
171188
" cur = conn.cursor()\n",

0 commit comments

Comments
 (0)