Skip to content

Commit cb3ede7

Browse files
committed
set low memory to false; increase flush size
1 parent 09cafeb commit cb3ede7

File tree

1 file changed

+15
-7
lines changed

1 file changed

+15
-7
lines changed

csv2sqlite-streaming.ipynb

Lines changed: 15 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -33,8 +33,8 @@
3333
"id": "5287cd41-7680-4ffc-be4e-cbf1098b7a86",
3434
"metadata": {
3535
"execution": {
36-
"iopub.execute_input": "2022-04-01T22:30:15.776608Z",
37-
"iopub.status.busy": "2022-04-01T22:30:15.776608Z"
36+
"iopub.execute_input": "2022-04-01T22:54:59.484432Z",
37+
"iopub.status.busy": "2022-04-01T22:54:59.483441Z"
3838
},
3939
"tags": []
4040
},
@@ -46,6 +46,14 @@
4646
"processing mimic-iv-1.0\\admissions.csv\n",
4747
"processing mimic-iv-1.0\\chartevents.csv\n"
4848
]
49+
},
50+
{
51+
"name": "stderr",
52+
"output_type": "stream",
53+
"text": [
54+
"C:\\Users\\jeev\\Anaconda3\\lib\\site-packages\\IPython\\core\\interactiveshell.py:3357: DtypeWarning: Columns (6,8) have mixed types.Specify dtype option on import or set low_memory=False.\n",
55+
" if (await self.run_code(code, result, async_=asy)):\n"
56+
]
4957
}
5058
],
5159
"source": [
@@ -96,7 +104,7 @@
96104
" if os.path.exists(file_path):\n",
97105
" os.remove(file_path)\n",
98106
"\n",
99-
"def create_db(input_dir, output_db, flush_size=1_000):\n",
107+
"def create_db(input_dir, output_db, flush_size=1_000_000):\n",
100108
" delete_file_if_exists(output_db)\n",
101109
" pathlib.Path(output_db).touch()\n",
102110
" \n",
@@ -123,14 +131,14 @@
123131
" lines.append(line)\n",
124132
" \n",
125133
" if len(lines) == flush_size:\n",
126-
" df = pd.read_csv(StringIO(''.join(lines)))\n",
127-
" df.to_sql(file_path.stem, conn, if_exists='append', index=False)\n",
134+
" pd.read_csv(StringIO(''.join(lines)), low_memory=False) \\\n",
135+
" .to_sql(file_path.stem, conn, if_exists='append', index=False)\n",
128136
" \n",
129137
" lines = []\n",
130138
" lines.append(headers)\n",
131139
" \n",
132-
" df = pd.read_csv(StringIO(''.join(lines)))\n",
133-
" df.to_sql(file_path.stem, conn, if_exists='append', index=False)\n",
140+
" pd.read_csv(StringIO(''.join(lines)), low_memory=False) \\\n",
141+
" to_sql(file_path.stem, conn, if_exists='append', index=False)\n",
134142
" \n",
135143
" \n",
136144
"create_db('./mimic-iv-1.0', 'mimic.db')\n",

0 commit comments

Comments
 (0)