Skip to content

Commit

Permalink
Dev release 0.41_01 Includes support for max_distance. Partially setu…
Browse files Browse the repository at this point in the history
…p to return a list if wantarray
  • Loading branch information
ugexe committed Oct 12, 2014
1 parent 5239a38 commit d5dd78d
Show file tree
Hide file tree
Showing 3 changed files with 81 additions and 75 deletions.
129 changes: 65 additions & 64 deletions XS.xs
Original file line number Diff line number Diff line change
Expand Up @@ -11,82 +11,83 @@ MODULE = Text::Levenshtein::XS PACKAGE = Text::Levenshtein::XS

PROTOTYPES: ENABLE

unsigned int
void *
xs_distance (arraySource, arrayTarget, maxDistance)
AV * arraySource
AV * arrayTarget
SV * maxDistance
CODE:
{
unsigned int i,j,edits,answer;
unsigned int lenSource = av_len(arraySource)+1;
unsigned int lenTarget = av_len(arrayTarget)+1;
unsigned int md = (unsigned int)SvUV(maxDistance);

if (lenSource > 0 && lenTarget > 0) {
unsigned int * s;
unsigned int * t;
unsigned int * v0;
unsigned int * v1;
SV* elem;

/* this block changes md such that it is always set to the */
/* max possible distance it if it is set to unlimited (0) */
md = (md == 0) ? MAX(lenSource,lenTarget) : md;

/* if string length difference > max_distance then return undef */
if ((MAX(lenSource , lenTarget) - MIN(lenSource, lenTarget)) > md)
XSRETURN_UNDEF;

Newxz(s, (lenSource + 1), unsigned int);
Newxz(t, (lenTarget + 1), unsigned int);
Newxz(v0, (lenTarget + 1), unsigned int);
Newxz(v1, (lenTarget + 1), unsigned int);

/* init first distance row with worst-case distance values */
for (i=0; i < (lenTarget + 1); i++) {
v0[i] = i;
}
SV * maxDistance
PPCODE:
{
unsigned int i,j,edits,answer;
unsigned int lenSource = av_len(arraySource)+1;
unsigned int lenTarget = av_len(arrayTarget)+1;
unsigned int md = SvUV(maxDistance);

if (lenSource > 0 && lenTarget > 0) {
unsigned int * s;
unsigned int * t;
unsigned int * v0;
unsigned int * v1;
unsigned int diff = MAX(lenSource , lenTarget) - MIN(lenSource, lenTarget);
SV* elem;

/* this block changes md such that it is always set to the */
/* max possible distance it if it is set to unlimited (0) */
md = (md == 0) ? MAX(lenSource,lenTarget) : md;
/* if string length difference > max_distance then return undef */
if (diff > md)
XSRETURN_UNDEF;

Newx(s, (lenSource + 1), unsigned int); // source
Newx(t, (lenTarget + 1), unsigned int); // target
Newx(v0, (lenTarget + 1), unsigned int); // vector 0
Newx(v1, (lenTarget + 1), unsigned int); // vector 1

/* init first distance row with worst-case distance values */
for (i=0; i < (lenTarget + 1); i++) {
v0[i] = i;
}

for (i=0; i < lenSource; i++) {
elem = sv_2mortal(av_shift(arraySource));
s[i] = (unsigned int)SvUV((SV *)elem);
for (i=0; i < lenSource; i++) {
elem = sv_2mortal(av_shift(arraySource));
s[i] = SvUV((SV *)elem);

v1[0] = i + 1;
v1[0] = i + 1;

for (j = 0; j < lenTarget; j++) {
if(i == 0) {
elem = sv_2mortal(av_shift(arrayTarget));
t[j] = (unsigned int)SvUV((SV *)elem);
}
for (j = 0; j < lenTarget; j++) {
if(i == 0) {
elem = sv_2mortal(av_shift(arrayTarget));
t[j] = SvUV((SV *)elem);
}

edits = (s[i] == t[j]) ? 0 : 1;
v1[j + 1] = MIN(MIN(v1[j] + 1, v0[j + 1] + 1), v0[j] + edits);
answer = v1[j + 1];
v1[j + 1] = MIN(MIN(v1[j] + 1, v0[j + 1] + 1), (v0[j] + ((s[i] == t[j]) ? 0 : 1)));

/* max distance exceeded */
if( answer > md )
XSRETURN_UNDEF;
}
/* max distance exceeded */
if( v1[0] == j && (v1[v1[0]]+1) > md )
XSRETURN_UNDEF;
}

/* copy current row to first row */
/* copy v1 to v0 */
if( i < lenSource || v1[lenTarget] > md ) {
for (j = 0; j < (lenTarget + 1); j++) {
v0[j] = v1[j];
}
}

Safefree(s);
Safefree(t);
Safefree(v0);
Safefree(v1);
RETVAL = answer;
}
else {
/* handle a blank string */
RETVAL = (lenSource>lenTarget) ? lenSource : lenTarget;
if( md != 0 && RETVAL > md )
XSRETURN_UNDEF;
}

answer = v1[lenTarget];
Safefree(s);
Safefree(t);
Safefree(v0);
Safefree(v1);
}
OUTPUT:
RETVAL
else {
/* handle a blank string */
answer = MAX(lenSource, lenTarget);
if( md != 0 && answer > md )
XSRETURN_UNDEF;
}

/* TODO: return list of distances if passed a list */
XPUSHs(sv_2mortal(newSViv(answer)));
} /* PPCODE */
2 changes: 1 addition & 1 deletion lib/Text/Levenshtein/XS.pm
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ use warnings FATAL => 'all';
require Exporter;

@Text::Levenshtein::XS::ISA = qw/Exporter/;
$Text::Levenshtein::XS::VERSION = qw/0.41/;
$Text::Levenshtein::XS::VERSION = qw/0.41_01/;
@Text::Levenshtein::XS::EXPORT_OK = qw/distance/;

eval {
Expand Down
25 changes: 15 additions & 10 deletions t/02_distance.t
Original file line number Diff line number Diff line change
Expand Up @@ -6,16 +6,19 @@ use Test::More;
use Text::Levenshtein::XS qw/distance/;

subtest 'distance' => sub {
is( distance('four','for'), 1, 'test distance insertion');
is( distance('four','four'), 0, 'test distance matching');
is( distance('four','fourth'), 2, 'test distance deletion');
is( distance('four','fuor'), 2, 'test distance (no) transposition');
is( distance('four','fxxr'), 2, 'test distance substitution');
is( distance('four','FOuR'), 3, 'test distance case');
is( distance('four',''), 4, 'test distance target empty');
is( distance('','four'), 4, 'test distance source empty');
is( distance('',''), 0, 'test distance source and target empty');
is( distance('111','11'), 1, 'test distance numbers');
is( distance('four','for'), 1, 'test distance insertion');
is( distance('four','four'), 0, ' test distance matching');
is( distance('four','fourth'), 2, 'test distance deletion');
is( distance('four','fuor'), 2, 'test distance (no) transposition');
is( distance('four','fxxr'), 2, 'test distance substitution');
is( distance('four','FOuR'), 3, 'test distance case');
is( distance('four',''), 4, 'test distance target empty');
is( distance('','four'), 4, 'test distance source empty');
is( distance('',''), 0, 'test distance source and target empty');
is( distance('111','11'), 1, 'test distance numbers');
is( distance('xxx' x 10000,'xa' x 500), 29500, 'test larger source and target');
is( distance('abcdxx','xx'), 4, 'test distance');
is( distance('xx','abcdxx'), 4, 'test distance');
};

subtest 'distance using a max distance' => sub {
Expand All @@ -37,6 +40,8 @@ subtest 'distance using a max distance' => sub {
is( distance('abcdxx','xx', 4), 4, 'test maxdistance == length difference between source and target; longer source');
is( distance('xx','abcdxx', 1), undef, 'test distance > maxdistance with length difference > max distance; longer target');
is( distance('xx','abcdxx', 4), 4, 'test maxdistance == length difference between source and target; longer target');
is( distance('x','123456789x', 8), undef, 'test maxdistance == length difference between source and target; clear example.');
is( distance('x','123456789x', 9), 9, 'test maxdistance == length difference between source and target; clear example.');
};

subtest 'utf8' => sub {
Expand Down

0 comments on commit d5dd78d

Please sign in to comment.