Skip to content

Commit e182201

Browse files
committed
Added Python codec name to the connection.
This allows dropping repeated dictionary lookups with unicode query/parameters.
1 parent ae06fb0 commit e182201

File tree

9 files changed

+140
-95
lines changed

9 files changed

+140
-95
lines changed

ChangeLog

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,8 @@
1+
2010-12-18 Daniele Varrazzo <daniele.varrazzo@gmail.com>
2+
3+
* connection.h: added codec attribute to avoid repeated codec name
4+
lookups during unicode query/params manipulations.
5+
16
2010-12-15 Daniele Varrazzo <daniele.varrazzo@gmail.com>
27

38
* psycopg/utils.c: Added psycopg_strdup function.

NEWS-2.3

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@ What's new in psycopg 2.3.2
44
- Improved PostgreSQL-Python encodings mapping. Added a few
55
missing encodings: EUC_CN, EUC_JIS_2004, ISO885910, ISO885916,
66
LATIN10, SHIFT_JIS_2004.
7+
- Dropped repeated dictionary lookups with unicode query/parameters.
78

89

910
What's new in psycopg 2.3.1

psycopg/adapter_qstring.c

Lines changed: 5 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -49,22 +49,9 @@ qstring_quote(qstringObject *self)
4949
Dprintf("qstring_quote: encoding to %s", self->encoding);
5050

5151
if (PyUnicode_Check(self->wrapped) && self->encoding) {
52-
PyObject *enc = PyDict_GetItemString(psycoEncodings, self->encoding);
53-
/* note that enc is a borrowed reference */
54-
55-
if (enc) {
56-
const char *s = PyString_AsString(enc);
57-
Dprintf("qstring_quote: encoding unicode object to %s", s);
58-
str = PyUnicode_AsEncodedString(self->wrapped, s, NULL);
59-
Dprintf("qstring_quote: got encoded object at %p", str);
60-
if (str == NULL) return NULL;
61-
}
62-
else {
63-
/* can't find the right encoder, raise exception */
64-
PyErr_Format(InterfaceError,
65-
"can't encode unicode string to %s", self->encoding);
66-
return NULL;
67-
}
52+
str = PyUnicode_AsEncodedString(self->wrapped, self->encoding, NULL);
53+
Dprintf("qstring_quote: got encoded object at %p", str);
54+
if (str == NULL) return NULL;
6855
}
6956

7057
/* if the wrapped object is a simple string, we don't know how to
@@ -144,8 +131,8 @@ qstring_prepare(qstringObject *self, PyObject *args)
144131
we don't need the encoding if that's not the case */
145132
if (PyUnicode_Check(self->wrapped)) {
146133
if (self->encoding) free(self->encoding);
147-
self->encoding = strdup(conn->encoding);
148-
Dprintf("qstring_prepare: set encoding to %s", conn->encoding);
134+
self->encoding = strdup(conn->codec);
135+
Dprintf("qstring_prepare: set encoding to %s", conn->codec);
149136
}
150137

151138
Py_CLEAR(self->conn);

psycopg/adapter_qstring.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -37,6 +37,10 @@ typedef struct {
3737

3838
PyObject *wrapped;
3939
PyObject *buffer;
40+
/* NOTE: this used to be a PostgreSQL encoding: changed in 2.3.2 to be a
41+
* Python codec name. I don't expect there has been any user for this
42+
* object other than adapting str/unicode, so I don't expect client code
43+
* broken for this reason. */
4044
char *encoding;
4145

4246
PyObject *conn;

psycopg/connection.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -83,6 +83,7 @@ typedef struct {
8383
char *dsn; /* data source name */
8484
char *critical; /* critical error on this connection */
8585
char *encoding; /* current backend encoding */
86+
char *codec; /* python codec name for encoding */
8687

8788
long int closed; /* 1 means connection has been closed;
8889
2 that something horrible happened */

psycopg/connection_int.c

Lines changed: 94 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -212,38 +212,89 @@ conn_get_standard_conforming_strings(PGconn *pgconn)
212212
return equote;
213213
}
214214

215-
/* Return a string containing the client_encoding setting.
215+
/* Convert a PostgreSQL encoding to a Python codec.
216216
*
217-
* Return a new string allocated by malloc(): use free() to free it.
218-
* Return NULL in case of failure.
217+
* Return a new copy of the codec name allocated on the Python heap,
218+
* NULL with exception in case of error.
219219
*/
220220
static char *
221-
conn_get_encoding(PGconn *pgconn)
221+
conn_encoding_to_codec(const char *enc)
222222
{
223-
const char *tmp, *i;
224-
char *encoding, *j;
223+
char *tmp;
224+
Py_ssize_t size;
225+
PyObject *pyenc;
226+
char *rv = NULL;
227+
228+
if (!(pyenc = PyDict_GetItemString(psycoEncodings, enc))) {
229+
PyErr_Format(OperationalError,
230+
"no Python codec for client encoding '%s'", enc);
231+
goto exit;
232+
}
233+
if (-1 == PyString_AsStringAndSize(pyenc, &tmp, &size)) {
234+
goto exit;
235+
}
236+
237+
/* have our own copy of the python codec name */
238+
rv = psycopg_strdup(tmp, size);
239+
240+
exit:
241+
/* pyenc is borrowed: no decref. */
242+
return rv;
243+
}
244+
245+
/* Read the client encoding from the connection.
246+
*
247+
* Store the encoding in the pgconn->encoding field and the name of the
248+
* matching python codec in codec. The buffers are allocated on the Python
249+
* heap.
250+
*
251+
* Return 0 on success, else nonzero.
252+
*/
253+
static int
254+
conn_read_encoding(connectionObject *self, PGconn *pgconn)
255+
{
256+
char *enc = NULL, *codec = NULL, *j;
257+
const char *tmp;
258+
int rv = -1;
225259

226260
tmp = PQparameterStatus(pgconn, "client_encoding");
227261
Dprintf("conn_connect: client encoding: %s", tmp ? tmp : "(none)");
228262
if (!tmp) {
229263
PyErr_SetString(OperationalError,
230264
"server didn't return client encoding");
231-
return NULL;
265+
goto exit;
232266
}
233267

234-
encoding = malloc(strlen(tmp)+1);
235-
if (encoding == NULL) {
268+
if (!(enc = PyMem_Malloc(strlen(tmp)+1))) {
236269
PyErr_NoMemory();
237-
return NULL;
270+
goto exit;
238271
}
239272

240-
/* return in uppercase */
241-
i = tmp;
242-
j = encoding;
243-
while (*i) { *j++ = toupper(*i++); }
273+
/* turn encoding in uppercase */
274+
j = enc;
275+
while (*tmp) { *j++ = toupper(*tmp++); }
244276
*j = '\0';
245277

246-
return encoding;
278+
/* Look for this encoding in Python codecs. */
279+
if (!(codec = conn_encoding_to_codec(enc))) {
280+
goto exit;
281+
}
282+
283+
/* Good, success: store the encoding/codec in the connection. */
284+
PyMem_Free(self->encoding);
285+
self->encoding = enc;
286+
enc = NULL;
287+
288+
PyMem_Free(self->codec);
289+
self->codec = codec;
290+
codec = NULL;
291+
292+
rv = 0;
293+
294+
exit:
295+
PyMem_Free(enc);
296+
PyMem_Free(codec);
297+
return rv;
247298
}
248299

249300
int
@@ -319,9 +370,8 @@ conn_setup(connectionObject *self, PGconn *pgconn)
319370
PyErr_SetString(InterfaceError, "only protocol 3 supported");
320371
return -1;
321372
}
322-
/* conn_get_encoding returns a malloc'd string */
323-
self->encoding = conn_get_encoding(pgconn);
324-
if (self->encoding == NULL) {
373+
374+
if (conn_read_encoding(self, pgconn)) {
325375
return -1;
326376
}
327377

@@ -651,9 +701,7 @@ _conn_poll_setup_async(connectionObject *self)
651701
PyErr_SetString(InterfaceError, "only protocol 3 supported");
652702
break;
653703
}
654-
/* conn_get_encoding returns a malloc'd string */
655-
self->encoding = conn_get_encoding(self->pgconn);
656-
if (self->encoding == NULL) {
704+
if (conn_read_encoding(self, self->pgconn)) {
657705
break;
658706
}
659707
self->cancel = conn_get_cancel(self->pgconn);
@@ -873,11 +921,15 @@ conn_set_client_encoding(connectionObject *self, const char *enc)
873921
char *error = NULL;
874922
char query[48];
875923
int res = 0;
924+
char *codec;
876925

877926
/* If the current encoding is equal to the requested one we don't
878927
issue any query to the backend */
879928
if (strcmp(self->encoding, enc) == 0) return 0;
880929

930+
/* We must know what python codec this encoding is. */
931+
if (!(codec = conn_encoding_to_codec(enc))) { return -1; }
932+
881933
Py_BEGIN_ALLOW_THREADS;
882934
pthread_mutex_lock(&self->lock);
883935

@@ -886,19 +938,29 @@ conn_set_client_encoding(connectionObject *self, const char *enc)
886938

887939
/* abort the current transaction, to set the encoding ouside of
888940
transactions */
889-
res = pq_abort_locked(self, &pgres, &error, &_save);
890-
891-
if (res == 0) {
892-
res = pq_execute_command_locked(self, query, &pgres, &error, &_save);
893-
if (res == 0) {
894-
/* no error, we can proceeed and store the new encoding */
895-
if (self->encoding) free(self->encoding);
896-
self->encoding = strdup(enc);
897-
Dprintf("conn_set_client_encoding: set encoding to %s",
898-
self->encoding);
899-
}
941+
if ((res = pq_abort_locked(self, &pgres, &error, &_save))) {
942+
goto endlock;
943+
}
944+
945+
if ((res = pq_execute_command_locked(self, query, &pgres, &error, &_save))) {
946+
goto endlock;
900947
}
901948

949+
/* no error, we can proceeed and store the new encoding */
950+
PyMem_Free(self->encoding);
951+
if (!(self->encoding = psycopg_strdup(enc, 0))) {
952+
res = 1; /* don't call pq_complete_error below */
953+
goto endlock;
954+
}
955+
956+
/* Store the python codec too. */
957+
PyMem_Free(self->codec);
958+
self->codec = codec;
959+
960+
Dprintf("conn_set_client_encoding: set encoding to %s (codec: %s)",
961+
self->encoding, self->codec);
962+
963+
endlock:
902964

903965
pthread_mutex_unlock(&self->lock);
904966
Py_END_ALLOW_THREADS;

psycopg/connection_type.c

Lines changed: 22 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -425,35 +425,38 @@ psyco_conn_set_isolation_level(connectionObject *self, PyObject *args)
425425
static PyObject *
426426
psyco_conn_set_client_encoding(connectionObject *self, PyObject *args)
427427
{
428-
const char *enc = NULL;
429-
char *buffer;
430-
size_t i, j;
428+
const char *enc;
429+
char *buffer, *dest;
430+
PyObject *rv = NULL;
431+
Py_ssize_t len;
431432

432433
EXC_IF_CONN_CLOSED(self);
433434
EXC_IF_CONN_ASYNC(self, set_client_encoding);
434435
EXC_IF_TPC_PREPARED(self, set_client_encoding);
435436

436-
if (!PyArg_ParseTuple(args, "s", &enc)) return NULL;
437+
if (!PyArg_ParseTuple(args, "s#", &enc, &len)) return NULL;
437438

438439
/* convert to upper case and remove '-' and '_' from string */
439-
buffer = PyMem_Malloc(strlen(enc)+1);
440-
for (i=j=0 ; i < strlen(enc) ; i++) {
441-
if (enc[i] == '_' || enc[i] == '-')
442-
continue;
443-
else
444-
buffer[j++] = toupper(enc[i]);
440+
if (!(dest = buffer = PyMem_Malloc(len+1))) {
441+
return PyErr_NoMemory();
442+
}
443+
444+
while (*enc) {
445+
if (*enc == '_' || *enc == '-') {
446+
++enc;
447+
}
448+
else {
449+
*dest++ = toupper(*enc++);
450+
}
445451
}
446-
buffer[j] = '\0';
452+
*dest = '\0';
447453

448454
if (conn_set_client_encoding(self, buffer) == 0) {
449-
PyMem_Free(buffer);
450455
Py_INCREF(Py_None);
451-
return Py_None;
452-
}
453-
else {
454-
PyMem_Free(buffer);
455-
return NULL;
456+
rv = Py_None;
456457
}
458+
PyMem_Free(buffer);
459+
return rv;
457460
}
458461

459462
/* get_transaction_status method - Get backend transaction status */
@@ -892,7 +895,8 @@ connection_dealloc(PyObject* obj)
892895
conn_notice_clean(self);
893896

894897
if (self->dsn) free(self->dsn);
895-
if (self->encoding) free(self->encoding);
898+
PyMem_Free(self->encoding);
899+
PyMem_Free(self->codec);
896900
if (self->critical) free(self->critical);
897901

898902
Py_CLEAR(self->async_cursor);

psycopg/cursor_type.c

Lines changed: 5 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -272,21 +272,11 @@ static PyObject *_psyco_curs_validate_sql_basic(
272272
Py_INCREF(sql);
273273
}
274274
else if (PyUnicode_Check(sql)) {
275-
PyObject *enc = PyDict_GetItemString(psycoEncodings,
276-
self->conn->encoding);
277-
/* enc is a borrowed reference; we won't decref it */
278-
279-
if (enc) {
280-
sql = PyUnicode_AsEncodedString(sql, PyString_AsString(enc), NULL);
281-
/* if there was an error during the encoding from unicode to the
282-
target encoding, we just let the exception propagate */
283-
if (sql == NULL) { goto fail; }
284-
} else {
285-
PyErr_Format(InterfaceError,
286-
"can't encode unicode SQL statement to %s",
287-
self->conn->encoding);
288-
goto fail;
289-
}
275+
char *enc = self->conn->codec;
276+
sql = PyUnicode_AsEncodedString(sql, enc, NULL);
277+
/* if there was an error during the encoding from unicode to the
278+
target encoding, we just let the exception propagate */
279+
if (sql == NULL) { goto fail; }
290280
}
291281
else {
292282
/* the is not unicode or string, raise an error */

psycopg/typecast_basic.c

Lines changed: 3 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -82,21 +82,12 @@ typecast_STRING_cast(const char *s, Py_ssize_t len, PyObject *curs)
8282
static PyObject *
8383
typecast_UNICODE_cast(const char *s, Py_ssize_t len, PyObject *curs)
8484
{
85-
PyObject *enc;
85+
char *enc;
8686

8787
if (s == NULL) {Py_INCREF(Py_None); return Py_None;}
8888

89-
enc = PyDict_GetItemString(psycoEncodings,
90-
((cursorObject*)curs)->conn->encoding);
91-
if (enc) {
92-
return PyUnicode_Decode(s, len, PyString_AsString(enc), NULL);
93-
}
94-
else {
95-
PyErr_Format(InterfaceError,
96-
"can't decode into unicode string from %s",
97-
((cursorObject*)curs)->conn->encoding);
98-
return NULL;
99-
}
89+
enc = ((cursorObject*)curs)->conn->codec;
90+
return PyUnicode_Decode(s, len, enc, NULL);
10091
}
10192

10293
/** BOOLEAN - cast boolean value into right python object **/

0 commit comments

Comments
 (0)