Skip to content

HTTPS clone URL

Subversion checkout URL

You can clone with HTTPS or Subversion.

Download ZIP

Loading…

makes finagle-mysql charsets extensible #150

Closed
wants to merge 3 commits into from

2 participants

Moses Nakamura Ruben Oanta
Moses Nakamura
Collaborator

motivation

Some people have mysql databases with different character encodings. finagle-mysql doesn't support a lot right now, but it should in the future.

implementation

This is a more complicated problem than you might think at first, because different columns can theoretically be different charsets within a single database.

todo

prepared statements

I only added support for Latin-1 in this PR, because it lets me punt on encoding prepared statements. For prepared statements, we don't need to declare anything about the charset to mysql, but we do need to encode them. Because latin-1 is compatible with utf-8, this just assumes every string in a prepared statement will use utf-8. More work will need to be done to guarantee that every string in a prepared statement is encoded properly.

other charsets

The other part of this that isn't complete is that the conversion from mysql collation number to a java.nio.charset is completely punted on, and it always returns utf-8. In the future, there should be a map from collations to charsets.

thanks

Thanks @roanta for bearing with me as I muddled through the finagle-mysql code, and providing guidance.

Moses Nakamura
Collaborator

@roanta tells me that this has been merged internally. I think that only up until twitter/finagle@a51ec07 was merged internally. I pushed to this branch, forgetting that it hadn't been pushed back into the open source repo.

I'm not sure what to do in this situation. Should I revert twitter/finagle@a2df331 on this branch? I have a new branch with a new pull request in #154.

Ruben Oanta
Collaborator

I've pulled this in along with #154. Should sync into the public repo shortly. Thanks!

Ruben Oanta roanta closed this
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Commits on Apr 1, 2013
  1. Moses Nakamura
Commits on Apr 2, 2013
  1. Moses Nakamura

    cleans up some detritus

    mosesn authored
Commits on Apr 8, 2013
  1. Moses Nakamura
This page is out of date. Refresh to see the latest.
8 finagle-mysql/src/main/scala/com/twitter/finagle/mysql/Codec.scala
View
@@ -54,14 +54,14 @@ class AuthenticationProxy(
extends ServiceFactoryProxy(underlying) {
def makeLoginReq(sg: ServersGreeting) =
- LoginRequest(username, password, database, clientCap, sg.salt, sg.serverCap)
+ LoginRequest(username, password, database, clientCap, sg.salt, sg.serverCap, sg.charset)
def acceptGreeting(res: Result) = res match {
case sg: ServersGreeting if !sg.serverCap.has(Capability.Protocol41) =>
Future.exception(IncompatibleServer("This client is only compatible with MySQL version 4.1 and later."))
- case sg: ServersGreeting if !Charset.isUTF8(sg.charset) =>
- Future.exception(IncompatibleServer("This client is only compatible with UTF-8 charset encoding."))
+ case sg: ServersGreeting if !Charset.isCompatible(sg.charset) =>
+ Future.exception(IncompatibleServer("This client is only compatible with UTF-8 and Latin-1 charset encoding."))
case sg: ServersGreeting =>
Future.value(sg)
@@ -85,4 +85,4 @@ class AuthenticationProxy(
loginRes <- service(makeLoginReq(sg))
_ <- acceptLogin(loginRes)
} yield service
-}
+}
3  finagle-mysql/src/main/scala/com/twitter/finagle/mysql/Request.scala
View
@@ -110,6 +110,7 @@ case class ExecuteRequest(ps: PreparedStatement, flags: Byte = 0, iterationCount
* Writes the parameter into its MySQL binary representation.
*/
private[this] def writeParam(param: Any, writer: BufferWriter) = param match {
+ // TODO: defaults to UTF-8. this is ok for ascii, not for every encoding.
case s: String => writer.writeLengthCodedString(s)
case b: Boolean => writer.writeBoolean(b)
case b: Byte => writer.writeByte(b)
@@ -164,4 +165,4 @@ case class CloseRequest(ps: PreparedStatement) extends CommandRequest(Command.CO
bw.writeByte(cmd).writeInt(ps.statementId)
bw.toChannelBuffer
}
-}
+}
35 finagle-mysql/src/main/scala/com/twitter/finagle/mysql/ResultSet.scala
View
@@ -1,6 +1,6 @@
package com.twitter.finagle.exp.mysql
-import com.twitter.finagle.exp.mysql.protocol.{BufferReader, Packet}
+import com.twitter.finagle.exp.mysql.protocol.{BufferReader, Packet, Charset}
import java.sql.{Timestamp, Date => SQLDate}
trait ResultSet extends Result {
@@ -84,9 +84,9 @@ class StringEncodedRow(row: Array[Byte], val fields: Seq[Field], indexMap: Map[S
* Convert the string representation of each value
* into an appropriate Value object.
*/
- val values: IndexedSeq[Value] = for (idx <- 0 until fields.size) yield {
- Value(fields(idx).fieldType, br.readLengthCodedString())
- }
+ val values: IndexedSeq[Value] = (for (field: Field <- fields.toIndexedSeq) yield {
+ Value(field.fieldType, br.readLengthCodedString(Charset(field.charset)))
+ })
def indexOf(name: String) = indexMap.get(name)
}
@@ -116,12 +116,12 @@ class BinaryEncodedRow(row: Array[Byte], val fields: Seq[Field], indexMap: Map[S
* Convert the binary representation of each value
* into an appropriate Value object.
*/
- val values: IndexedSeq[Value] = for (idx <- 0 until fields.size) yield {
+ val values: IndexedSeq[Value] = (for ((field, idx) <- fields.toIndexedSeq.zipWithIndex) yield {
if (isNull(idx))
NullValue
else
- Value(fields(idx).fieldType, buffer)
- }
+ Value(field.fieldType, buffer, Charset(field.charset))
+ })
def indexOf(name: String) = indexMap.get(name)
}
@@ -148,14 +148,21 @@ case class Field(
object Field {
def decode(packet: Packet): Field = {
val br = BufferReader(packet.body)
- val catalog = br.readLengthCodedString()
- val db = br.readLengthCodedString()
- val table = br.readLengthCodedString()
- val origTable = br.readLengthCodedString()
- val name = br.readLengthCodedString()
- val origName = br.readLengthCodedString()
+ val bytesCatalog = br.readLengthCodedBytes()
+ val bytesDb = br.readLengthCodedBytes()
+ val bytesTable = br.readLengthCodedBytes()
+ val bytesOrigTable = br.readLengthCodedBytes()
+ val bytesName = br.readLengthCodedBytes()
+ val bytesOrigName = br.readLengthCodedBytes()
br.skip(1) // filler
val charset = br.readShort()
+ val jCharset = Charset(charset)
+ val catalog = new String(bytesCatalog, jCharset)
+ val db = new String(bytesDb, jCharset)
+ val table = new String(bytesTable, jCharset)
+ val origTable = new String(bytesOrigTable, jCharset)
+ val name = new String(bytesName, jCharset)
+ val origName = new String(bytesOrigName, jCharset)
val length = br.readInt()
val fieldType = br.readUnsignedByte()
val flags = br.readShort()
@@ -174,4 +181,4 @@ object Field {
decimals
)
}
-}
+}
11 finagle-mysql/src/main/scala/com/twitter/finagle/mysql/Value.scala
View
@@ -3,6 +3,7 @@ package com.twitter.finagle.exp.mysql
import com.twitter.finagle.exp.mysql.protocol.{BufferReader, BufferWriter, SQLZeroDate, SQLZeroTimestamp, Type}
import java.sql.{Timestamp, Date => SQLDate}
import java.util.Calendar
+import java.nio.charset.{Charset => JCharset}
/**
* Defines a Value ADT that represents values
@@ -68,10 +69,10 @@ object Value {
* and a byte buffer. If the mapping is unknwon
* a RawBinaryValue is returned.
*/
- def apply(typeCode: Int, buffer: BufferReader) = typeCode match {
- case Type.STRING => StringValue(buffer.readLengthCodedString())
- case Type.VAR_STRING => StringValue(buffer.readLengthCodedString())
- case Type.VARCHAR => StringValue(buffer.readLengthCodedString())
+ def apply(typeCode: Int, buffer: BufferReader, charset: JCharset) = typeCode match {
+ case Type.STRING => StringValue(buffer.readLengthCodedString(charset))
+ case Type.VAR_STRING => StringValue(buffer.readLengthCodedString(charset))
+ case Type.VARCHAR => StringValue(buffer.readLengthCodedString(charset))
case Type.TINY => ByteValue(buffer.readByte())
case Type.SHORT => ShortValue(buffer.readShort())
case Type.INT24 => IntValue(buffer.readInt24())
@@ -240,4 +241,4 @@ object DateValue {
buffer.writeByte(cal.get(Calendar.DATE))
buffer
}
-}
+}
50 finagle-mysql/src/main/scala/com/twitter/finagle/mysql/protocol/Buffer.scala
View
@@ -5,6 +5,7 @@ import java.nio.ByteOrder
import java.nio.charset.{Charset => JCharset}
import org.jboss.netty.buffer.ChannelBuffer
import org.jboss.netty.buffer.ChannelBuffers._
+import scala.collection.mutable.{Buffer => SBuffer}
/**
* The BufferReader and BufferWriter interfaces provide methods for
@@ -133,29 +134,40 @@ trait BufferReader {
/**
* Reads a null-terminated string where
- * null is denoted by '\0'. Uses Charset.defaultCharset
+ * null is denoted by '\0'. Uses Charset.defaultCharset by default
* to decode strings.
* @return a null-terminated String starting at offset.
*/
- def readNullTerminatedString(): String = {
+ def readNullTerminatedString(charset: JCharset = Charset.defaultCharset): String = {
val start = offset
var length = 0
while (readByte() != 0x00)
length += 1
- this.toString(start, length, Charset.defaultCharset)
+ this.toString(start, length, charset)
+ }
+
+ /**
+ * Reads a null-terminated array where
+ * null is denoted by '\0'.
+ * @return a null-terminated String starting at offset.
+ */
+ def readNullTerminatedBytes(): Array[Byte] = {
+ val cur: SBuffer[Byte] = SBuffer()
+ do cur += readByte() while (cur.last != 0x00)
+ cur.init.toArray
}
/**
* Reads a length encoded string according to the MySQL
- * Client/Server protocol. Uses Charset.defaultCharset to
- * decode strings. For more details refer to MySQL
+ * Client/Server protocol. Uses Charset.defaultCharset by default
+ * to decode strings. For more details refer to MySQL
* documentation.
* @return a MySQL length coded String starting at
* offset.
*/
- def readLengthCodedString(): String = {
+ def readLengthCodedString(charset: JCharset = Charset.defaultCharset): String = {
val length = readLengthCodedBinary()
if (length == Buffer.NULL_LENGTH)
null
@@ -164,7 +176,7 @@ trait BufferReader {
else {
val start = offset
skip(length)
- this.toString(start, length, Charset.defaultCharset)
+ this.toString(start, length, charset)
}
}
@@ -346,25 +358,31 @@ trait BufferWriter {
/**
* Writes a null terminated string onto the buffer where
- * '\0' denotes null. Uses Charset.defaultCharset to decode the given
- * String.
+ * '\0' denotes null. Uses Charset.defaultCharset by default
+ * to decode the given String.
* @param s String to write.
*/
- def writeNullTerminatedString(s: String): BufferWriter = {
- writeBytes(s.getBytes(Charset.defaultCharset))
+ def writeNullTerminatedString(
+ s: String,
+ charset: JCharset = Charset.defaultCharset
+ ): BufferWriter = {
+ writeBytes(s.getBytes(charset))
writeByte('\0')
this
}
/**
* Writes a length coded string using the MySQL Client/Server
- * protocol. Uses Charset.defaultCharset to decode the given
- * String.
+ * protocol. Uses Charset.defaultCharset by default to decode
+ * the given String.
* @param s String to write to buffer.
*/
- def writeLengthCodedString(s: String): BufferWriter = {
+ def writeLengthCodedString(
+ s: String,
+ charset: JCharset = Charset.defaultCharset
+ ): BufferWriter = {
writeLengthCodedBinary(s.length)
- writeBytes(s.getBytes(Charset.defaultCharset))
+ writeBytes(s.getBytes(charset))
this
}
@@ -468,4 +486,4 @@ object BufferWriter {
def toChannelBuffer = underlying
}
-}
+}
41 finagle-mysql/src/main/scala/com/twitter/finagle/mysql/protocol/Charset.scala
View
@@ -3,6 +3,17 @@ package com.twitter.finagle.exp.mysql.protocol
import java.nio.charset.{Charset => JCharset}
object Charset {
+
+ /**
+ * Converts from mysql charset to java charset.
+ */
+ def apply(charset: Short): JCharset = if (isUtf8(charset) || isBinary(charset))
+ JCharset.forName("UTF-8")
+ else if (isLatin1(charset))
+ JCharset.forName("ISO-8859-1")
+ else
+ throw new IllegalArgumentException("Charset %d is not supported.".format(charset))
+
/**
* Default Charset to use when decoding strings.
*/
@@ -11,6 +22,14 @@ object Charset {
/**
* MySQL UTF-8 Collations.
*/
+ val Latin1_bin = 47.toShort
+ val Latin1_danish_ci = 15.toShort
+ val Latin1_general_ci = 48.toShort
+ val Latin1_general_cs = 49.toShort
+ val Latin1_german1_c1 = 5.toShort
+ val Latin1_german2_ci = 31.toShort
+ val Latin1_spanish_ci = 94.toShort
+ val Latin1_swedish_c1 = 8.toShort
val Utf8_bin = 83.toShort
val Utf8_czech_ci = 202.toShort
val Utf8_danish_ci = 203.toShort
@@ -34,6 +53,17 @@ object Charset {
val Utf8_turkish_ci = 201.toShort
val Utf8_unicode_ci = 192.toShort
+ private[this] val Latin1Set = Set(
+ Latin1_bin,
+ Latin1_danish_ci,
+ Latin1_general_ci,
+ Latin1_general_cs,
+ Latin1_german1_c1,
+ Latin1_german2_ci,
+ Latin1_spanish_ci,
+ Latin1_swedish_c1
+ )
+
private[this] val Utf8Set = Set(
Utf8_bin,
Utf8_czech_ci,
@@ -59,5 +89,12 @@ object Charset {
Utf8_unicode_ci
)
- def isUTF8(code: Short) = Utf8Set.contains(code)
-}
+ private[this] val Binary = 63.toShort
+
+ private[this] val CompatibleSet = Latin1Set ++ Utf8Set
+
+ def isCompatible(code: Short): Boolean = CompatibleSet(code)
+ def isUtf8(code: Short): Boolean = Utf8Set(code)
+ def isLatin1(code: Short): Boolean = Latin1Set(code)
+ def isBinary(code: Short): Boolean = code == Binary
+}
12 finagle-mysql/src/main/scala/com/twitter/finagle/mysql/protocol/Handshake.scala
View
@@ -3,6 +3,7 @@ package com.twitter.finagle.exp.mysql.protocol
import com.twitter.finagle.exp.mysql.protocol.Capability._
import com.twitter.finagle.exp.mysql.{Result, Request}
import java.security.MessageDigest
+import java.nio.charset.{Charset => JCharset}
/**
* Initial Result received from server during handshaking.
@@ -21,12 +22,13 @@ object ServersGreeting {
def decode(packet: Packet): ServersGreeting = {
val br = BufferReader(packet.body)
val protocol = br.readByte()
- val version = br.readNullTerminatedString()
+ val bytesVersion = br.readNullTerminatedBytes()
val threadId = br.readInt()
val salt1 = br.take(8)
br.skip(1) // 1 filler byte always 0x00
val serverCap = Capability(br.readUnsignedShort())
val charset = br.readUnsignedByte()
+ val version = new String(bytesVersion, Charset(charset))
val status = br.readShort()
br.skip(13)
val salt2 = br.take(12)
@@ -68,17 +70,17 @@ case class LoginRequest(
bw.writeInt(maxPacket)
bw.writeByte(charset)
bw.fill(23, 0.toByte) // 23 reserved bytes - zeroed out
- bw.writeNullTerminatedString(username)
+ bw.writeNullTerminatedString(username, Charset(charset))
bw.writeLengthCodedBytes(hashPassword)
if (newClientCap.has(ConnectWithDB) && serverCap.has(ConnectWithDB))
- bw.writeNullTerminatedString(database.get)
+ bw.writeNullTerminatedString(database.get, Charset(charset))
bw.toChannelBuffer
}
private[this] def encryptPassword(password: String, salt: Array[Byte]) = {
val md = MessageDigest.getInstance("SHA-1")
- val hash1 = md.digest(password.getBytes(Charset.defaultCharset.displayName))
+ val hash1 = md.digest(password.getBytes(Charset(charset).displayName))
md.reset()
val hash2 = md.digest(hash1)
md.reset()
@@ -91,4 +93,4 @@ case class LoginRequest(
}
digest
}
-}
+}
Something went wrong with that request. Please try again.