Skip to content

Commit

Permalink
fix part compute for Chinese (#30)
Browse files Browse the repository at this point in the history
* fix part compute for Chinese

* add comment
  • Loading branch information
Nicole00 committed Dec 1, 2021
1 parent 9c79a1b commit cd851ea
Show file tree
Hide file tree
Showing 2 changed files with 14 additions and 1 deletion.
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,8 @@

package com.vesoft.nebula.exchange.utils

import java.nio.charset.Charset

import com.google.common.primitives.UnsignedLong
import com.vesoft.nebula.exchange.{MetaProvider, VidType}
import com.vesoft.nebula.exchange.config.{SchemaConfigEntry, Type}
Expand Down Expand Up @@ -84,7 +86,9 @@ object NebulaUtils {

def getPartitionId(id: String, partitionSize: Int, vidType: VidType.Value): Int = {
val hashValue: Long = if (vidType == VidType.STRING) {
MurmurHash2.hash64(id.getBytes, id.length, 0xc70f6907)
// todo charset must be the same with Nebula Space
val byteId = id.getBytes(Charset.forName("UTF-8"))
MurmurHash2.hash64(byteId, byteId.length, 0xc70f6907)
} else {
id.toLong
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -152,6 +152,15 @@ class NebulaUtilsSuite {
assert(NebulaUtils.getPartitionId("-1", 10, VidType.INT) == 6)
assert(NebulaUtils.getPartitionId("-2", 10, VidType.INT) == 5)
assert(NebulaUtils.getPartitionId("-3", 10, VidType.INT) == 4)

// for chinese
assert(NebulaUtils.getPartitionId("中文", 10, VidType.STRING) == 5)
assert(NebulaUtils.getPartitionId("北京", 10, VidType.STRING) == 7)
assert(NebulaUtils.getPartitionId("北京123", 10, VidType.STRING) == 1)
assert(NebulaUtils.getPartitionId("北A12ABC", 10, VidType.STRING) == 3)
assert(NebulaUtils.getPartitionId("蒙DPP8EC", 10, VidType.STRING) == 4)
assert(NebulaUtils.getPartitionId("赣F6893_Vehicle", 10, VidType.STRING) == 2)
assert(NebulaUtils.getPartitionId("湘3Z4A1E_vehicle", 10, VidType.STRING) == 1)
}

@Test
Expand Down

0 comments on commit cd851ea

Please sign in to comment.