Skip to content

HTTPS clone URL

Subversion checkout URL

You can clone with
or
.
Download ZIP
Browse files

Fixing hashing consistency across JVM instances.

Several of the core JRuby classes calculate hash codes based on java or ruby
object ids. This doesn't produce consistent hashing across JVM instances, which
is needed for distributed frameworks. For example, Hadoop uses hashCode values
to distribute keys from the map phase to the same reducer task (partitioning).

This commit adds hashCode (and ruby's hash method) implementations for
RubyBoolean, RubyNil, and RubySymbol. RubyBoolean and RubyNil simply return
static, randomly-generated hashCode values that are hard-coded. This replaces
the default java Object#hashCode.

For RubySymbol, the previous implementation of hashCode returned the symbol's
id, which could be different depending on the order in which symbols are
created. This updates it to calculate a hashCode based on the raw symbolBytes
like the RubyString implementation, but with a RubySymbol-specific seed and
without the encoding addition for 1.9. This value is calculated when symbols
are instantiated so the performance impact should be minimal.

This commit also adds a RubyInstanceConfig setting and CLI option for
consistent hashing, jruby.consistent.hashing.enabled, which controls whether
the Ruby runtime's hash seeds (k0 and k1) are generated randomly. When set to
true, they are set to static values. These hash seeds are used to hash
RubyString objects, so this will make string hash codes consistent across JVMs.

(later commit...)

Updating hashCode implementations.

Per discussion on the last commit's pull request [1], updating the
implementations of hashCode for RubyNil and RubyBoolean. Now the hashCode
behavior for nil and booleans will only change when consistent hashing is
enabled. Adds a hashCode instance variable to RubyBoolean and RubyNil that is
set in the constructor to the Object#hashCode value (using
System.identityHashCode) or a static value.

[1]: jruby#590
  • Loading branch information...
commit af1d38770b1ebc1811e66ea33d9814b82826867d 1 parent ed0a495
@rdblue rdblue authored
View
10 src/org/jruby/Ruby.java
@@ -216,8 +216,14 @@ private Ruby(RubyInstanceConfig config) {
myRandom = new Random();
}
this.random = myRandom;
- this.hashSeedK0 = this.random.nextLong();
- this.hashSeedK1 = this.random.nextLong();
+
+ if (RubyInstanceConfig.CONSISTENT_HASHING_ENABLED) {
+ this.hashSeedK0 = -561135208506705104l;
+ this.hashSeedK1 = 7114160726623585955l;
+ } else {
+ this.hashSeedK0 = this.random.nextLong();
+ this.hashSeedK1 = this.random.nextLong();
+ }
this.beanManager.register(new Config(this));
this.beanManager.register(parserStats);
View
24 src/org/jruby/RubyBoolean.java
@@ -46,13 +46,23 @@
*/
@JRubyClass(name={"TrueClass", "FalseClass"})
public class RubyBoolean extends RubyObject {
-
+
+ private final int hashCode;
+
RubyBoolean(Ruby runtime, boolean value) {
super(runtime,
(value ? runtime.getTrueClass() : runtime.getFalseClass()),
false); // Don't put in object space
if (!value) flags = FALSE_F;
+
+ if (RubyInstanceConfig.CONSISTENT_HASHING_ENABLED) {
+ // default to a fixed value
+ this.hashCode = value ? 155 : -48;
+ } else {
+ // save the object id based hash code;
+ this.hashCode = System.identityHashCode(this);
+ }
}
@Override
@@ -82,6 +92,7 @@ public static RubyClass createFalseClass(Ruby runtime) {
falseClass.setReifiedClass(RubyBoolean.class);
falseClass.defineAnnotatedMethods(False.class);
+ falseClass.defineAnnotatedMethods(RubyBoolean.class);
falseClass.getMetaClass().undefineMethod("new");
@@ -95,6 +106,7 @@ public static RubyClass createTrueClass(Ruby runtime) {
trueClass.setReifiedClass(RubyBoolean.class);
trueClass.defineAnnotatedMethods(True.class);
+ trueClass.defineAnnotatedMethods(RubyBoolean.class);
trueClass.getMetaClass().undefineMethod("new");
@@ -161,6 +173,16 @@ public static IRubyObject true_to_s(IRubyObject t) {
}
}
+ @JRubyMethod(name = "hash")
+ public RubyFixnum hash(ThreadContext context) {
+ return context.runtime.newFixnum(hashCode());
+ }
+
+ @Override
+ public int hashCode() {
+ return hashCode;
+ }
+
@Override
public RubyFixnum id() {
if ((flags & FALSE_F) == 0) {
View
8 src/org/jruby/RubyInstanceConfig.java
@@ -1653,6 +1653,14 @@ public boolean shouldPrecompileAll() {
public static final boolean COROUTINE_FIBERS = Options.FIBER_COROUTINES.load();
+ /**
+ * Whether to calculate consistent hashes across JVM instances, or to ensure
+ * un-predicatable hash values using SecureRandom.
+ *
+ * Set with the <tt>jruby.consistent.hashing.enabled</tt> system property.
+ */
+ public static final boolean CONSISTENT_HASHING_ENABLED = Options.CONSISTENT_HASHING_ENABLED.load();
+
private static volatile boolean loadedNativeExtensions = false;
////////////////////////////////////////////////////////////////////////////
View
23 src/org/jruby/RubyNil.java
@@ -45,9 +45,20 @@
*/
@JRubyClass(name="NilClass")
public class RubyNil extends RubyObject {
+
+ private final int hashCode;
+
public RubyNil(Ruby runtime) {
super(runtime, runtime.getNilClass(), false);
flags |= NIL_F | FALSE_F;
+
+ if (RubyInstanceConfig.CONSISTENT_HASHING_ENABLED) {
+ // default to a fixed value
+ this.hashCode = 34;
+ } else {
+ // save the object id based hash code;
+ this.hashCode = System.identityHashCode(this);
+ }
}
public static final ObjectAllocator NIL_ALLOCATOR = new ObjectAllocator() {
@@ -181,7 +192,17 @@ public static RubyBoolean op_xor(IRubyObject recv, IRubyObject obj) {
public IRubyObject nil_p() {
return getRuntime().getTrue();
}
-
+
+ @JRubyMethod(name = "hash")
+ public RubyFixnum hash(ThreadContext context) {
+ return context.runtime.newFixnum(hashCode());
+ }
+
+ @Override
+ public int hashCode() {
+ return hashCode;
+ }
+
@Override
public RubyFixnum id() {
return getRuntime().newFixnum(4);
View
14 src/org/jruby/RubySymbol.java
@@ -65,15 +65,20 @@
import org.jruby.runtime.callsite.NormalCachingCallSite;
import org.jruby.runtime.marshal.UnmarshalStream;
import org.jruby.util.ByteList;
+import org.jruby.util.PerlHash;
+import org.jruby.util.SipHashInline;
/**
* Represents a Ruby symbol (e.g. :bar)
*/
@JRubyClass(name="Symbol")
public class RubySymbol extends RubyObject {
+ public static final long symbolHashSeedK0 = 5238926673095087190l;
+
private final String symbol;
private final int id;
private final ByteList symbolBytes;
+ private final int hashCode;
/**
*
@@ -99,6 +104,13 @@ private RubySymbol(Ruby runtime, String internedSymbol, ByteList symbolBytes) {
this.symbol = internedSymbol;
this.symbolBytes = symbolBytes;
this.id = runtime.allocSymbolId();
+
+ long hash = runtime.isSiphashEnabled() ? SipHashInline.hash24(
+ symbolHashSeedK0, 0, symbolBytes.getUnsafeBytes(),
+ symbolBytes.getBegin(), symbolBytes.getRealSize()) :
+ PerlHash.hash(symbolHashSeedK0, symbolBytes.getUnsafeBytes(),
+ symbolBytes.getBegin(), symbolBytes.getRealSize());
+ this.hashCode = (int) hash;
}
private RubySymbol(Ruby runtime, String internedSymbol) {
@@ -296,7 +308,7 @@ public RubyFixnum hash(ThreadContext context) {
@Override
public int hashCode() {
- return id;
+ return hashCode;
}
public int getId() {
View
1  src/org/jruby/util/cli/Options.java
@@ -213,6 +213,7 @@ public static String dump() {
public static final Option<Boolean> FIBER_COROUTINES = bool(MISCELLANEOUS, "fiber.coroutines", false, "Use JVM coroutines for Fiber.");
public static final Option<Boolean> GLOBAL_REQUIRE_LOCK = bool(MISCELLANEOUS, "global.require.lock", false, "Use a single global lock for requires.");
public static final Option<Boolean> NATIVE_EXEC = bool(MISCELLANEOUS, "native.exec", true, "Do a true process-obliterating native exec for Kernel#exec.");
+ public static final Option<Boolean> CONSISTENT_HASHING_ENABLED = bool(MISCELLANEOUS, "consistent.hashing.enabled", false, "Generate consistent object hashes across JVMs");
public static final Option<Boolean> DEBUG_LOADSERVICE = bool(DEBUG, "debug.loadService", false, "Log require/load file searches.");
public static final Option<Boolean> DEBUG_LOADSERVICE_TIMING = bool(DEBUG, "debug.loadService.timing", false, "Log require/load parse+evaluate times.");
View
14 test/org/jruby/test/TestRubyNil.java
@@ -38,6 +38,8 @@
import org.jruby.Ruby;
import org.jruby.RubyFixnum;
import org.jruby.RubyNil;
+import org.jruby.javasupport.util.RuntimeHelpers;
+import org.jruby.runtime.builtin.IRubyObject;
/**
* @author chadfowler
@@ -98,4 +100,16 @@ public void testOpXOr() {
assertTrue(RubyNil.op_xor(rubyNil, runtime.getTrue()).isTrue());
assertTrue(RubyNil.op_xor(rubyNil, runtime.getFalse()).isFalse());
}
+
+ public void testHash() {
+ IRubyObject hash = RuntimeHelpers.invoke(
+ runtime.getCurrentContext(), rubyNil, "hash");
+ assertEquals(RubyFixnum.newFixnum(
+ runtime, System.identityHashCode(rubyNil)), hash);
+ }
+
+ public void testHashCode() {
+ // should be the default Object#hashCode()
+ assertEquals(System.identityHashCode(rubyNil), rubyNil.hashCode());
+ }
}
View
9 test/org/jruby/test/TestRubySymbol.java
@@ -62,4 +62,13 @@ public void testSymbolTable() throws Exception {
assertSame(another, st.getSymbol("another_name"));
assertSame(another, st.fastGetSymbol("another_name"));
}
+
+ public void testSymbolHashCode() {
+ RubySymbol sym = RubySymbol.newSymbol(runtime, "somename");
+ assertTrue(sym.hashCode() != 0);
+ assertTrue(sym.hashCode() != sym.getId());
+ if (runtime.isSiphashEnabled()) {
+ assertEquals(1706472664, sym.hashCode());
+ }
+ }
}
Please sign in to comment.
Something went wrong with that request. Please try again.