-
Notifications
You must be signed in to change notification settings - Fork 1
/
csvformat.c
242 lines (213 loc) · 6.51 KB
/
csvformat.c
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
/*
* CSV Selector
*
* by William R. Fraser, 10/22/2011
*/
#include <stdio.h>
#include <stdlib.h>
#include <stdbool.h>
#include <unistd.h>
#include <string.h>
#include "growbuf.h"
#include "csvformat.h"
//#define DEBUG
#define DEBUG if (false)
/**
* same as strchr() except it looks for multiple characters.
*
* Arguments:
* haystack - string to be searched
* chars - characters to search for
* nchars - number of characters in chars
*
* Return Value:
* Pointer to the first one of chars in haystack, or NULL if none found.
*/
const char* strchrs(const char* haystack, const char* chars, size_t nchars)
{
const char* end = haystack + strlen(haystack);
while (haystack < end) {
for (size_t i = 0; i < nchars; i++) {
if (*haystack == chars[i]) {
return haystack;
}
}
haystack++;
}
return NULL;
}
/**
* Print a CSV field, with appropriate double-quotes.
*
* No double-quotes are used, unless the field contains a comma, or a newline.
*
* Arguments:
* field - field to print
* output - file pointer to print to
*/
void print_csv_field(const char* field, FILE* output)
{
if (NULL != strchrs(field, ",\n", 2)) {
fprintf(output, "\"");
for (size_t i = 0; i < strlen(field); i++) {
if (field[i] == '"') {
fprintf(output, "\"\"");
}
else {
fprintf(output, "%c", field[i]);
}
}
fprintf(output, "\"");
}
else {
fprintf(output, "%s", field);
}
}
static int read_csv_internal(
FILE* input,
row_evaluator row_evaluator,
void* context,
bool one_row_only,
int start_row_number)
{
int retval = 0;
growbuf* fields = NULL;
growbuf* field = NULL;
size_t rownum = start_row_number;
bool in_dquot = false;
bool prev_was_dquot = false;
uint64_t byte_offset = 0;
uint64_t row_byte_offset = 0;
fields = growbuf_create(1);
field = growbuf_create(32);
growbuf_append(fields, &field, sizeof(growbuf*));
while (true) { // iterate over lines
int char_in = fgetc(input);
if (EOF == char_in) {
goto handle_eof;
}
char c = (char)char_in;
switch (c) {
case '"':
if (in_dquot) {
if (prev_was_dquot) {
growbuf_append(field, &c, 1);
prev_was_dquot = false;
}
else {
prev_was_dquot = true;
// don't append yet, wait for the next char.
}
}
else if (field->size != 0) {
growbuf_append(field, &c, 1);
}
else {
in_dquot = true;
// don't append.
}
break;
case '\n':
if (!in_dquot || prev_was_dquot) {
// we're done with the line
c = '\0';
growbuf_append(field, &c, 1);
DEBUG for (size_t i = 0; i < fields->size / sizeof(void*); i++)
{
fprintf(stderr, "field %zu: ", i);
fprintf(stderr, "\"%s\"\n",
(char*)(((growbuf**)fields->buf)[i]->buf)
);
}
if (fields->size / sizeof(void*) > 0
&& (fields->size / sizeof(void*) > 1
|| ((growbuf**)fields->buf)[0]->size > 0))
{
row_evaluator(fields, rownum, row_byte_offset, context);
}
for (size_t i = 0; i < fields->size / sizeof(void*); i++) {
growbuf_free(((growbuf**)(fields->buf))[i]);
}
fields->size = 0;
if (one_row_only) {
goto cleanup;
}
field = growbuf_create(32);
growbuf_append(fields, &field, sizeof(void*));
in_dquot = false;
prev_was_dquot = false;
rownum++;
row_byte_offset = byte_offset + 1;
break;
}
else {
// embedded newline
growbuf_append(field, &c, 1);
}
break;
case ',':
if (!in_dquot || prev_was_dquot) {
// we're done with the field
c = '\0';
growbuf_append(field, &c, 1);
field = growbuf_create(32);
growbuf_append(fields, &field, sizeof(void*));
in_dquot = false;
prev_was_dquot = false;
}
else {
growbuf_append(field, &c, 1);
}
break;
default:
if (in_dquot && prev_was_dquot) {
fprintf(stderr, "csv format error: double-quoted field has "
"trailing garbage. Line %zu, field %zu\n",
rownum,
fields->size / sizeof(void*));
retval = 1;
goto cleanup;
}
else {
growbuf_append(field, &c, 1);
}
break;
} // switch
byte_offset += 1;
} // while (true)
handle_eof:
if (fields->size / sizeof(void*) > 0
&& ((fields->size / sizeof(void*) > 1
|| ((growbuf**)fields->buf)[0]->size > 0)))
{
row_evaluator(fields, rownum, byte_offset, context);
}
cleanup:
if (NULL != fields) {
for (size_t i = 0; i < fields->size / sizeof(void*); i++) {
growbuf_free(((growbuf**)(fields->buf))[i]);
}
growbuf_free(fields);
}
return retval;
}
/**
* Read a CSV file, running a function on each row.
*
* Arguments:
* input - file pointer to CSV file to read
* row_evaluator - pointer to a function which takes 3 arguments:
* - 2-dimensional growbuf with the fields
* - the row number
* - the context parameter passed to this function
* and returns void.
* context - arbitrary data to pass to the row evaluator
*/
int read_csv(FILE* input, row_evaluator row_evaluator, void* context)
{
return read_csv_internal(input, row_evaluator, context, false, 0);
}
int read_csv_row(FILE* input, int row_number, row_evaluator row_evaluator, void* context)
{
return read_csv_internal(input, row_evaluator, context, true, row_number);
}