Skip to content

Commit

Permalink
This effectively merges @tenderlove's test hashing PR
Browse files Browse the repository at this point in the history
#36

Conflicts:
	ext/phashion_ext/phashion_ext.c
	lib/phashion.rb
	phashion.gemspec
  • Loading branch information
westonplatter committed Aug 21, 2014
2 parents 97916a4 + 3bd059d commit 72dc66a
Show file tree
Hide file tree
Showing 3 changed files with 108 additions and 0 deletions.
84 changes: 84 additions & 0 deletions ext/phashion_ext/phashion_ext.c
Expand Up @@ -98,6 +98,86 @@ static VALUE mh_hash_for(VALUE self, VALUE filename, VALUE alpha, VALUE lvl) {
return array;
}

static VALUE texthash_for(VALUE self, VALUE file) {
int nbpoints, i;
VALUE list;
VALUE point_class;

TxtHashPoint *points = ph_texthash(StringValuePtr(file), &nbpoints);

point_class = rb_const_get(self, rb_intern("TextHashPoint"));

list = rb_ary_new2((long)nbpoints);

for(i = 0; i < nbpoints; i++) {
VALUE point;
VALUE args[2];

args[0] = ULL2NUM(points[i].hash);
args[1] = ULL2NUM(points[i].index);

point = rb_class_new_instance(2, args, point_class);
rb_ary_push(list, point);
}

free(points);

return list;
}

static TxtHashPoint * rb2phash_points(VALUE list) {
int i;
TxtHashPoint * txt_list;

txt_list = (TxtHashPoint *)xcalloc(RARRAY_LEN(list), sizeof(TxtHashPoint));

for(i = 0; i < RARRAY_LEN(list); i++) {
VALUE elem = rb_ary_entry(list, i);
txt_list[i].hash = NUM2ULL(rb_funcall(elem, rb_intern("hash"), 0));
txt_list[i].index = NUM2INT(rb_funcall(elem, rb_intern("index"), 0));
}

return txt_list;
}

static VALUE textmatches_for(VALUE self, VALUE list1, VALUE list2) {
int nbmatches, i;
VALUE list;
VALUE match_class;
TxtHashPoint *txt_list1;
TxtHashPoint *txt_list2;

txt_list1 = rb2phash_points(list1);
txt_list2 = rb2phash_points(list2);

TxtMatch *matches = ph_compare_text_hashes(txt_list1, RARRAY_LEN(list1),
txt_list2, RARRAY_LEN(list2),
&nbmatches);

xfree(txt_list1);
xfree(txt_list2);

match_class = rb_const_get(self, rb_intern("TextMatch"));

list = rb_ary_new2((long)nbmatches);

for(i = 0; i < nbmatches; i++) {
VALUE match;
VALUE args[3];

args[0] = INT2NUM(matches[i].first_index);
args[1] = INT2NUM(matches[i].second_index);
args[2] = INT2NUM(matches[i].length);

match = rb_class_new_instance(3, args, match_class);
rb_ary_push(list, match);
}

free(matches);

return list;
}

#ifdef __cplusplus
extern "C" {
#endif
Expand All @@ -107,8 +187,12 @@ extern "C" {

rb_define_singleton_method(c, "hamming_distance", (VALUE(*)(ANYARGS))hamming_distance, 2);
rb_define_singleton_method(c, "image_hash_for", (VALUE(*)(ANYARGS))image_hash_for, 1);

rb_define_singleton_method(c, "_mh_hash_for", (VALUE(*)(ANYARGS))mh_hash_for, 3);
rb_define_singleton_method(c, "hamming_distance2", (VALUE(*)(ANYARGS))hamming_distance2, 2);

rb_define_singleton_method(c, "texthash_for", (VALUE(*)(ANYARGS))texthash_for, 1);
rb_define_singleton_method(c, "textmatches_for", (VALUE(*)(ANYARGS))textmatches_for, 2);
}

#ifdef HAVE_SQLITE3EXT_H
Expand Down
3 changes: 3 additions & 0 deletions lib/phashion.rb
Expand Up @@ -9,6 +9,9 @@
require 'rbconfig'

module Phashion
TextHashPoint = Struct.new :hash, :index
TextMatch = Struct.new :first_index, :second_index, :length

class Image
DEFAULT_DUPE_THRESHOLD = 15

Expand Down
21 changes: 21 additions & 0 deletions test/test_phashion.rb
@@ -1,8 +1,29 @@
require 'helper'
require 'sqlite3'
require 'tempfile'

class TestPhashion < Minitest::Test

def test_text_hash
matches = Tempfile.open('foo') do |f|
100.times { |i|
f.write "hello world #{i}"
}
f.close
a = Phashion.texthash_for f.path
b = Phashion.texthash_for f.path

assert_operator a.length, :>, 0
assert_operator b.length, :>, 0
a.each { |hash| assert_instance_of Phashion::TextHashPoint, hash }
b.each { |hash| assert_instance_of Phashion::TextHashPoint, hash }

Phashion.textmatches_for(a, b)
end
assert_operator matches.length, :>, 0
matches.each { |match| assert_instance_of Phashion::TextMatch, match }
end

def split(hash)
r = hash & 0xFFFFFFFF
l = (hash >> 32) & 0xFFFFFFFF
Expand Down

0 comments on commit 72dc66a

Please sign in to comment.