Skip to content

Commit

Permalink
add a sqlite3 extension for calculating hamming distance in the db
Browse files Browse the repository at this point in the history
  • Loading branch information
tenderlove committed Dec 18, 2013
1 parent 82919e1 commit 98f22a4
Show file tree
Hide file tree
Showing 4 changed files with 104 additions and 0 deletions.
2 changes: 2 additions & 0 deletions ext/phashion_ext/extconf.rb
Expand Up @@ -43,4 +43,6 @@
$LIBS = " -lpthread -lpHash_gem -lstdc++ -ljpeg -lpng"
end

have_header 'sqlite3ext.h'

create_makefile 'phashion_ext'
48 changes: 48 additions & 0 deletions ext/phashion_ext/phashion_ext.c
Expand Up @@ -30,6 +30,54 @@ extern "C" {
rb_define_singleton_method(c, "hamming_distance", (VALUE(*)(ANYARGS))hamming_distance, 2);
rb_define_singleton_method(c, "image_hash_for", (VALUE(*)(ANYARGS))image_hash_for, 1);
}

#ifdef HAVE_SQLITE3EXT_H
#include <sqlite3ext.h>

SQLITE_EXTENSION_INIT1

static void hamming_distance(sqlite3_context * ctx, int agc, sqlite3_value **argv)
{
sqlite3_int64 hashes[4];
ulong64 left, right;
int i, result;

for(i = 0; i < 4; i++) {
if (SQLITE_INTEGER == sqlite3_value_type(argv[i])) {
hashes[i] = sqlite3_value_int64(argv[i]);
} else {
hashes[i] = 0;
}
}

left = (hashes[0] << 32) + hashes[1];
right = (hashes[2] << 32) + hashes[3];
result = ph_hamming_distance(left, right);
sqlite3_result_int(ctx, result);
}

int sqlite3_phashionext_init(

This comment has been minimized.

Copy link
@westonplatter

westonplatter Oct 29, 2014

Owner

@tenderlove - changed the name of this to make tests pass - this may break your code.

sqlite3 *db,
char **pzErrMsg,
const sqlite3_api_routines *pApi
){
SQLITE_EXTENSION_INIT2(pApi);

sqlite3_create_function(
db,
"hamming_distance",
4,
SQLITE_UTF8,
NULL,
hamming_distance,
NULL,
NULL
);
return SQLITE_OK;
}

#endif

#ifdef __cplusplus
}
#endif
6 changes: 6 additions & 0 deletions lib/phashion.rb
Expand Up @@ -6,6 +6,8 @@
# int ph_dct_imagehash(const char *file, ulong64 &hash);
# int ph_hamming_distance(ulong64 hasha, ulong64 hashb);

require 'rbconfig'

module Phashion
VERSION = '1.0.6'

Expand All @@ -28,6 +30,10 @@ def fingerprint
end
end

def self.so_file
extname = RbConfig::CONFIG['DLEXT']
File.join File.dirname(__FILE__), "phashion_ext.#{extname}"
end
end

require 'phashion_ext'
48 changes: 48 additions & 0 deletions test/test_phashion.rb
@@ -1,7 +1,55 @@
require 'helper'
require 'sqlite3'

class TestPhashion < Test::Unit::TestCase

def split(hash)
r = hash & 0xFFFFFFFF
l = (hash >> 32) & 0xFFFFFFFF
[l, r]
end

def test_db_bad_arg
db = SQLite3::Database.new ':memory:'
return unless db.respond_to? :enable_load_extension

db.enable_load_extension true
db.load_extension Phashion.so_file

res = db.execute "SELECT hamming_distance('foo', 'bar', 'baz', 'zot')"
assert_equal [[0]], res
end

def test_db_extension
db = SQLite3::Database.new ':memory:'
return unless db.respond_to? :enable_load_extension

db.enable_load_extension true
db.load_extension Phashion.so_file

db.execute <<-eosql
CREATE TABLE "images" (
"id" INTEGER PRIMARY KEY AUTOINCREMENT NOT NULL,
"fingerprint_l" integer NOT NULL,
"fingerprint_r" integer NOT NULL)
eosql

jpg = File.dirname(__FILE__) + '/jpg/Broccoli_Super_Food.jpg'
png = File.dirname(__FILE__) + '/png/Broccoli_Super_Food.png'

hash1 = Phashion.image_hash_for jpg
hash2 = Phashion.image_hash_for png

l, r = split hash1
db.execute "INSERT INTO images (fingerprint_l, fingerprint_r) VALUES (#{l}, #{r})"

expected = Phashion.hamming_distance hash1, hash2

l, r = split hash2
rows = db.execute "SELECT hamming_distance(fingerprint_l, fingerprint_r, #{l}, #{r}) FROM images"
assert_equal expected, rows.first.first
end

def test_duplicate_detection
files = %w(86x86-0a1e.jpeg 86x86-83d6.jpeg 86x86-a855.jpeg)
images = files.map {|f| Phashion::Image.new("#{File.dirname(__FILE__) + '/../test/jpg/'}#{f}")}
Expand Down

0 comments on commit 98f22a4

Please sign in to comment.