forked from petdance/tidyp
/
iconvtc.c
104 lines (77 loc) · 2.55 KB
/
iconvtc.c
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
/* iconvtc.c -- Interface to iconv transcoding routines
(c) 1998-2008 (W3C) MIT, ERCIM, Keio University
See tidyp.h for the copyright notice.
*/
#include "tidyp.h"
#include "forward.h"
#include "streamio.h"
#ifdef TIDY_ICONV_SUPPORT
#include <iconv.h>
/* maximum number of bytes for a single character */
#define TC_INBUFSIZE 16
/* maximum number of characters per byte sequence */
#define TC_OUTBUFSIZE 16
Bool IconvInitInputTranscoder(void)
{
return no;
}
void IconvUninitInputTranscoder(void)
{
return;
}
int IconvGetChar(byte firstByte, StreamIn * in, uint * bytesRead)
{
iconv_t cd;
TidyInputSource * source;
char inbuf[TC_INBUFSIZE] = { 0 };
char outbuf[TC_OUTBUFSIZE] = { 0 };
size_t inbufsize = 0;
assert( in != NULL );
assert( &in->source != NULL );
assert( bytesRead != NULL );
assert( in->iconvptr != 0 );
cd = (iconv_t)in->iconvptr;
source = &in->source;
inbuf[inbufsize++] = (char)firstByte;
while(inbufsize < TC_INBUFSIZE)
{
char * outbufptr = (char*)outbuf;
char * inbufptr = (char*)inbuf;
size_t readNow = inbufsize;
size_t writeNow = TC_OUTBUFSIZE;
size_t result = 0;
int iconv_errno = 0;
int nextByte = EndOfStream;
result = iconv(cd, (const char**)&inbufptr, &readNow, (char**)&outbufptr, &writeNow);
iconv_errno = errno;
if (result != (size_t)(-1))
{
int c;
/* create codepoint from UTF-32LE octets */
c = (unsigned char)outbuf[0];
c += (unsigned char)outbuf[1] << 8;
c += (unsigned char)outbuf[2] << 16;
c += (unsigned char)outbuf[3] << 32;
/* set number of read bytes */
*bytesRead = inbufsize;
return c;
}
assert( iconv_errno != EILSEQ ); /* broken multibyte sequence */
assert( iconv_errno != E2BIG ); /* not enough memory */
assert( iconv_errno == EINVAL ); /* incomplete sequence */
/* we need more bytes */
nextByte = source->getByte(source->sourceData);
if (nextByte == EndOfStream)
{
/* todo: error message for broken stream? */
*bytesRead = inbufsize;
return EndOfStream;
}
inbuf[inbufsize++] = (char)nextByte;
}
/* No full character found after reading TC_INBUFSIZE bytes, */
/* give up to read this stream, it's obviously unreadable. */
/* todo: error message for broken stream? */
return EndOfStream;
}
#endif /* TIDY_ICONV_SUPPORT */