microsoft · jykanase · May 14, 2025
@@ -0,0 +1,237 @@
+From 7a9246885bb50bd483ef78a3d1334e0d73502b1c Mon Sep 17 00:00:00 2001
+From: jykanase <v-jykanase@microsoft.com>
+Date: Wed, 14 May 2025 07:24:34 +0000
+Subject: [PATCH] CVE-2025-4207
+Upstream Patch Reference: https://git.postgresql.org/gitweb/?p=postgresql.git;a=commit;h=ec5f89e8a29f32c7dbc4dd8734ed8406d771de2f
+---
+ src/backend/utils/mb/mbutils.c | 18 ++++++++----
+ src/common/jsonapi.c           |  7 +++--
+ src/common/wchar.c             | 51 ++++++++++++++++++++++++++++++----
+ src/include/mb/pg_wchar.h      |  2 ++
+ src/interfaces/libpq/fe-exec.c |  5 ++--
+ src/interfaces/libpq/fe-misc.c | 15 ++++------
+ 6 files changed, 74 insertions(+), 24 deletions(-)
+
+diff --git a/src/backend/utils/mb/mbutils.c b/src/backend/utils/mb/mbutils.c
+index 67a1ab2..1ccdf9d 100644
+--- a/src/backend/utils/mb/mbutils.c
++++ b/src/backend/utils/mb/mbutils.c
+@@ -1088,7 +1088,7 @@ pg_mbcliplen(const char *mbstr, int len, int limit)
+ }
+
+ /*
+- * pg_mbcliplen with specified encoding
++ * pg_mbcliplen with specified encoding; string must be valid in encoding
+  */
+ int
+ pg_encoding_mbcliplen(int encoding, const char *mbstr,
+@@ -1699,12 +1699,12 @@ check_encoding_conversion_args(int src_encoding,
+  * report_invalid_encoding: complain about invalid multibyte character
+  *
+  * note: len is remaining length of string, not length of character;
+- * len must be greater than zero, as we always examine the first byte.
++ * len must be greater than zero (or we'd neglect initializing "buf").
+  */
+ void
+ report_invalid_encoding(int encoding, const char *mbstr, int len)
+ {
+-	int			l = pg_encoding_mblen(encoding, mbstr);
++	int			l = pg_encoding_mblen_or_incomplete(encoding, mbstr, len);
+ 	char		buf[8 * 5 + 1];
+ 	char	   *p = buf;
+ 	int			j,
+@@ -1731,18 +1731,26 @@ report_invalid_encoding(int encoding, const char *mbstr, int len)
+  * report_untranslatable_char: complain about untranslatable character
+  *
+  * note: len is remaining length of string, not length of character;
+- * len must be greater than zero, as we always examine the first byte.
++ * len must be greater than zero (or we'd neglect initializing "buf").
+  */
+ void
+ report_untranslatable_char(int src_encoding, int dest_encoding,
+ 						   const char *mbstr, int len)
+ {
+-	int			l = pg_encoding_mblen(src_encoding, mbstr);
++	int			l;
+ 	char		buf[8 * 5 + 1];
+ 	char	   *p = buf;
+ 	int			j,
+ 				jlimit;
+
++	/*
++	 * We probably could use plain pg_encoding_mblen(), because
++	 * gb18030_to_utf8() verifies before it converts.  All conversions should.
++	 * For src_encoding!=GB18030, len>0 meets pg_encoding_mblen() needs.  Even
++	 * so, be defensive, since a buggy conversion might pass invalid data.
++	 * This is not a performance-critical path.
++	 */
++	l = pg_encoding_mblen_or_incomplete(src_encoding, mbstr, len);
+ 	jlimit = Min(l, len);
+ 	jlimit = Min(jlimit, 8);	/* prevent buffer overrun */
+
+diff --git a/src/common/jsonapi.c b/src/common/jsonapi.c
+index 07f6928..0fa5361 100644
+--- a/src/common/jsonapi.c
++++ b/src/common/jsonapi.c
+@@ -721,8 +721,11 @@ json_lex_string(JsonLexContext *lex)
+ 	} while (0)
+ #define FAIL_AT_CHAR_END(code) \
+ 	do { \
+-		char	   *term = s + pg_encoding_mblen(lex->input_encoding, s); \
+-		lex->token_terminator = (term <= end) ? term : end; \
++		ptrdiff_t	remaining = end - s; \
++		int			charlen; \
++		charlen = pg_encoding_mblen_or_incomplete(lex->input_encoding, \
++												  s, remaining); \
++		lex->token_terminator = (charlen <= remaining) ? s + charlen : end; \
+ 		return code; \
+ 	} while (0)
+
+diff --git a/src/common/wchar.c b/src/common/wchar.c
+index dcb03d0..82ea3a4 100644
+--- a/src/common/wchar.c
++++ b/src/common/wchar.c
+@@ -12,6 +12,8 @@
+  */
+ #include "c.h"
+
++#include <limits.h>
++
+ #include "mb/pg_wchar.h"
+ #include "utils/ascii.h"
+
+@@ -2169,10 +2171,27 @@ const pg_wchar_tbl pg_wchar_table[] = {
+ /*
+  * Returns the byte length of a multibyte character.
+  *
+- * Caution: when dealing with text that is not certainly valid in the
+- * specified encoding, the result may exceed the actual remaining
+- * string length.  Callers that are not prepared to deal with that
+- * should use pg_encoding_mblen_bounded() instead.
++ * Choose "mblen" functions based on the input string characteristics.
++ * pg_encoding_mblen() can be used when ANY of these conditions are met:
++ *
++ * - The input string is zero-terminated
++ *
++ * - The input string is known to be valid in the encoding (e.g., string
++ *   converted from database encoding)
++ *
++ * - The encoding is not GB18030 (e.g., when only database encodings are
++ *   passed to 'encoding' parameter)
++ *
++ * encoding==GB18030 requires examining up to two bytes to determine character
++ * length.  Therefore, callers satisfying none of those conditions must use
++ * pg_encoding_mblen_or_incomplete() instead, as access to mbstr[1] cannot be
++ * guaranteed to be within allocation bounds.
++ *
++ * When dealing with text that is not certainly valid in the specified
++ * encoding, the result may exceed the actual remaining string length.
++ * Callers that are not prepared to deal with that should use Min(remaining,
++ * pg_encoding_mblen_or_incomplete()).  For zero-terminated strings, that and
++ * pg_encoding_mblen_bounded() are interchangeable.
+  */
+ int
+ pg_encoding_mblen(int encoding, const char *mbstr)
+@@ -2183,8 +2202,28 @@ pg_encoding_mblen(int encoding, const char *mbstr)
+ }
+
+ /*
+- * Returns the byte length of a multibyte character; but not more than
+- * the distance to end of string.
++ * Returns the byte length of a multibyte character (possibly not
++ * zero-terminated), or INT_MAX if too few bytes remain to determine a length.
++ */
++int
++pg_encoding_mblen_or_incomplete(int encoding, const char *mbstr,
++								size_t remaining)
++{
++	/*
++	 * Define zero remaining as too few, even for single-byte encodings.
++	 * pg_gb18030_mblen() reads one or two bytes; single-byte encodings read
++	 * zero; others read one.
++	 */
++	if (remaining < 1 ||
++		(encoding == PG_GB18030 && IS_HIGHBIT_SET(*mbstr) && remaining < 2))
++		return INT_MAX;
++	return pg_encoding_mblen(encoding, mbstr);
++}
++
++/*
++ * Returns the byte length of a multibyte character; but not more than the
++ * distance to the terminating zero byte.  For input that might lack a
++ * terminating zero, use Min(remaining, pg_encoding_mblen_or_incomplete()).
+  */
+ int
+ pg_encoding_mblen_bounded(int encoding, const char *mbstr)
+diff --git a/src/include/mb/pg_wchar.h b/src/include/mb/pg_wchar.h
+index 396bbde..96d138d 100644
+--- a/src/include/mb/pg_wchar.h
++++ b/src/include/mb/pg_wchar.h
+@@ -575,6 +575,8 @@ extern int	pg_valid_server_encoding_id(int encoding);
+  */
+ extern void pg_encoding_set_invalid(int encoding, char *dst);
+ extern int	pg_encoding_mblen(int encoding, const char *mbstr);
++extern int	pg_encoding_mblen_or_incomplete(int encoding, const char *mbstr,
++											size_t remaining);
+ extern int	pg_encoding_mblen_bounded(int encoding, const char *mbstr);
+ extern int	pg_encoding_dsplen(int encoding, const char *mbstr);
+ extern int	pg_encoding_verifymbchar(int encoding, const char *mbstr, int len);
+diff --git a/src/interfaces/libpq/fe-exec.c b/src/interfaces/libpq/fe-exec.c
+index ed249fb..17ef04c 100644
+--- a/src/interfaces/libpq/fe-exec.c
++++ b/src/interfaces/libpq/fe-exec.c
+@@ -3966,7 +3966,7 @@ PQescapeStringInternal(PGconn *conn,
+ 		}
+
+ 		/* Slow path for possible multibyte characters */
+-		charlen = pg_encoding_mblen(encoding, source);
++		charlen = pg_encoding_mblen_or_incomplete(encoding, source, remaining);
+
+ 		if (remaining < charlen)
+ 		{
+@@ -4118,7 +4118,8 @@ PQescapeInternal(PGconn *conn, const char *str, size_t len, bool as_ident)
+ 			int			charlen;
+
+ 			/* Slow path for possible multibyte characters */
+-			charlen = pg_encoding_mblen(conn->client_encoding, s);
++			charlen = pg_encoding_mblen_or_incomplete(conn->client_encoding,
++													  s, remaining);
+
+ 			if (charlen > remaining)
+ 			{
+diff --git a/src/interfaces/libpq/fe-misc.c b/src/interfaces/libpq/fe-misc.c
+index 488f7d6..02dd799 100644
+--- a/src/interfaces/libpq/fe-misc.c
++++ b/src/interfaces/libpq/fe-misc.c
+@@ -1165,13 +1165,9 @@ pqSocketPoll(int sock, int forRead, int forWrite, time_t end_time)
+  */
+
+ /*
+- * Returns the byte length of the character beginning at s, using the
+- * specified encoding.
+- *
+- * Caution: when dealing with text that is not certainly valid in the
+- * specified encoding, the result may exceed the actual remaining
+- * string length.  Callers that are not prepared to deal with that
+- * should use PQmblenBounded() instead.
++ * Like pg_encoding_mblen().  Use this in callers that want the
++ * dynamically-linked libpq's stance on encodings, even if that means
++ * different behavior in different startups of the executable.
+  */
+ int
+ PQmblen(const char *s, int encoding)
+@@ -1180,8 +1176,9 @@ PQmblen(const char *s, int encoding)
+ }
+
+ /*
+- * Returns the byte length of the character beginning at s, using the
+- * specified encoding; but not more than the distance to end of string.
++ * Like pg_encoding_mblen_bounded().  Use this in callers that want the
++ * dynamically-linked libpq's stance on encodings, even if that means
++ * different behavior in different startups of the executable.
+  */
+ int
+ PQmblenBounded(const char *s, int encoding)
+-- 
+2.45.2
+
@@ -3,15 +3,15 @@
 Summary:        PostgreSQL database engine
 Name:           postgresql
 Version:        16.7
-Release:        1%{?dist}
+Release:        2%{?dist}
 License:        PostgreSQL
 Vendor:         Microsoft Corporation
 Distribution:   Azure Linux
 Group:          Applications/Databases
 URL:            https://www.postgresql.org
 Source0:        https://ftp.postgresql.org/pub/source/v%{version}/%{name}-%{version}.tar.bz2
 Source1:	%{name}.service
-
+Patch0 :        CVE-2025-4207.patch
 # Common libraries needed
 BuildRequires:  krb5-devel
 BuildRequires:  libxml2-devel
@@ -243,6 +243,9 @@ fi
 %{_unitdir}/%{name}.service
 
 %changelog
+* Wed May 14 2025 Jyoti Kanase <v-jykanase@microsoft.com> - 16.7-2
+- Patch CVE-2025-4207
+
 * Mon Feb 17 2025 CBL-Mariner Servicing Account <cblmargh@microsoft.com> - 16.7-1
 - Auto-upgrade to 16.7 - to fix CVE-2025-1094