Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Re-implement MedianIterator to use a proper sequence of medians #88

Merged
merged 2 commits into from Feb 26, 2019
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
96 changes: 51 additions & 45 deletions src/main/java/org/passay/dictionary/AbstractWordList.java
Expand Up @@ -3,6 +3,7 @@

import java.util.Comparator;
import java.util.Iterator;
import java.util.NoSuchElementException;

/**
* Provides common operations implementations for word lists.
Expand Down Expand Up @@ -72,9 +73,14 @@ protected void checkIsString(final Object o)
private abstract class AbstractWordListIterator implements Iterator<String>
{

/** Index of next word in list. */
/** Index of next word in the iterator sequence. */
protected int index;

@Override
public boolean hasNext()
{
return index < size();
}

@Override
public void remove()
Expand All @@ -92,73 +98,73 @@ public void remove()
*/
private class SequentialIterator extends AbstractWordListIterator
{


@Override
public boolean hasNext()
{
return index < size();
}


@Override
public String next()
{
if (!hasNext()) {
throw new NoSuchElementException();
}
return get(index++);
}
}


/**
* Iterator that iterates over a word list from the median outward to either end. In particular, for a word list of N
* elements whose median index is M, and for each i such that M-i >= 0 and M+i < N, the M-i element is visited before
* the M+i element.
* Iterator that iterates over a word list by following a sequence of medians.
* This enables the creation of a well-balanced search tree from a sorted word list.
* <p>
* The sequence of median indices starts with the global median,
* followed by the median of the left half, the median of the right half,
* the median of the left half of the left half, etc. (recursively).
*
* @author Middleware Services
* @author Amichai Rothman
* @author Ronen Zilberman
*/
private class MedianIterator extends AbstractWordListIterator
{

/** Index of median element in given list. */
private final int median = size() / 2;

/** Indicates direction of next item. */
private int sign;


@Override
public boolean hasNext()
/**
* Returns the i-th element in the sequence of median indices of the given size.
*
* @param i the index within the median sequence of the element to return
* @param size the size of the sequence
* @return the i-th element in the median indices sequence
*/
int toMedianIndex(final int i, final int size)
{
final int n = size();
final boolean result;
if (sign > 0) {
result = median + index < n;
} else if (sign < 0) {
result = median - index >= 0;
// we use long multiplication to avoid int overflow
// (good for all int inputs, beyond that double arithmetic is required)
final int powerOfTwo = Integer.highestOneBit(i + 1);
final long remainder = (i + 1) - powerOfTwo;
final int leftovers = size - powerOfTwo;
// all power of two levels that we can fill up completely go to the first case,
// the leftovers in the last incomplete power of two level go to the second case
// (also the special case of index 0 of the leftovers goes to the first
// case just so it'll return 0 and not fail due to an edge-case of rounding)
if (leftovers >= powerOfTwo || remainder == 0) {
// find the correct fraction (power of two denominator
// and indexed odd-numbered numerator), and multiply by size
// to get our median index
return (int) (size * (2 * remainder + 1) / (2 * powerOfTwo));
} else {
result = n > 0;
// find the correct fraction (denominator is the number of leftover
// items in the incomplete last power of two level, and indexed
// numerator), and multiply by size to get our median index.
// note that the leftovers (indices that were not returned by
// any of the smaller power of two levels) are evenly spaced,
// so they can be trivially calculated, with the added -1 for
// rounding them down properly)
return (int) ((size * remainder - 1) / leftovers);
}
return result;
}


@Override
public String next()
{
final String next;
if (sign > 0) {
next = get(median + index);
sign = -1;
index++;
} else if (sign < 0) {
next = get(median - index);
sign = 1;
} else {
next = get(median);
sign = -1;
index = 1;
if (!hasNext()) {
throw new NoSuchElementException();
}
return next;
return get(toMedianIndex(index++, size()));
}
}
}
Expand Up @@ -237,7 +237,7 @@ public static void main(final String[] args)
"<options> <operation> \\");
System.out.println("");
System.out.println("where <options> includes:");
System.out.println(" -m (Insert dictionary using it's median) \\");
System.out.println(" -m (Insert dictionary using its medians) \\");
System.out.println(" -ci (Make search case-insensitive) \\");
System.out.println("");
System.out.println("where <operation> includes:");
Expand Down
2 changes: 1 addition & 1 deletion src/main/java/org/passay/dictionary/WordList.java
Expand Up @@ -41,7 +41,7 @@ public interface WordList


/**
* Returns an iterator to traverse this word list from the median.
* Returns an iterator to traverse this word list by following a recursive sequence of medians.
*
* @return iterator for this word list
*/
Expand Down
95 changes: 80 additions & 15 deletions src/test/java/org/passay/dictionary/AbstractWordListTest.java
Expand Up @@ -4,8 +4,14 @@
import java.io.IOException;
import java.lang.reflect.Method;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.stream.IntStream;
import org.testng.AssertJUnit;
import org.testng.annotations.AfterClass;
import org.testng.annotations.DataProvider;
Expand Down Expand Up @@ -198,25 +204,84 @@ public void iterator(final T list) throws Exception
* Test for {@link WordList#medianIterator()}.
*
* @param list Word list to test.
*
* @throws Exception On test failure.
*/
@Test(groups = {"wltest"}, dataProvider = "shortWordLists")
public void medianIterator(final T list) throws Exception
public void medianIterator(final T list)
{
final Iterator<String> i = list.medianIterator();
int index = list.size() / 2;
int count = 0;
while (i.hasNext()) {
final String s = i.next();
AssertJUnit.assertEquals(list.get(index), s);
count++;
if (count % 2 == 0) {
index = index + count;
} else {
index = index - count;
}
final Iterator<String> iterator = list.medianIterator();
final Set<String> found = new HashSet<>();
final int size = list.size();
final double[] fractions = {1 / 2d, 1 / 4d, 3 / 4d, 1 / 8d, 3 / 8d, 5 / 8d, 7 / 8d, 1 / 16d};
for (int i = 0; i < fractions.length; i++) {
final int index = (int) (size * fractions[i]);
AssertJUnit.assertTrue(iterator.hasNext());
AssertJUnit.assertEquals(list.get(index), iterator.next());
found.add(list.get(index));
}
while (iterator.hasNext()) {
found.add(iterator.next());
}
AssertJUnit.assertEquals(size, found.size());
}


/**
* Checks that an iterator is correct, i.e. each
* element is returned exactly once (regardless of order).
*
* @param iterator an iterator supplying unique elements
* @param size the expected number of elements
*/
private void assertIteratorCorrect(final Iterator<String> iterator, final int size)
{
final Map<String, Integer> found = new HashMap<>(size);
found.clear();
while (iterator.hasNext()) {
final String next = iterator.next();
AssertJUnit.assertFalse("got duplicate: " + next + " (size " + size + ")", found.containsKey(next));
found.put(next, 1);
}
AssertJUnit.assertEquals("missing items (size " + size + ")", size, found.size());
}

/**
* Test for {@link WordList#medianIterator()} of various edge-cases and sizes.
*/
@Test(groups = {"wltest"})
public void medianIterator()
{
// ensure median order in edge cases of 0-4 elements.
Iterator<String> iterator;
iterator = new ArrayWordList(new String[] {}).medianIterator();
AssertJUnit.assertFalse(iterator.hasNext());
iterator = new ArrayWordList(new String[] {"0"}).medianIterator();
AssertJUnit.assertTrue(iterator.hasNext());
AssertJUnit.assertEquals("0", iterator.next());
AssertJUnit.assertFalse(iterator.hasNext());
iterator = new ArrayWordList(new String[] {"0", "1"}).medianIterator();
AssertJUnit.assertTrue(iterator.hasNext());
AssertJUnit.assertEquals("1", iterator.next());
AssertJUnit.assertEquals("0", iterator.next());
AssertJUnit.assertFalse(iterator.hasNext());
iterator = new ArrayWordList(new String[] {"0", "1", "2"}).medianIterator();
AssertJUnit.assertEquals("1", iterator.next());
AssertJUnit.assertEquals("0", iterator.next());
AssertJUnit.assertEquals("2", iterator.next());
AssertJUnit.assertFalse(iterator.hasNext());
iterator = new ArrayWordList(new String[] {"0", "1", "2", "3"}).medianIterator();
AssertJUnit.assertEquals("2", iterator.next());
AssertJUnit.assertEquals("1", iterator.next());
AssertJUnit.assertEquals("3", iterator.next());
AssertJUnit.assertEquals("0", iterator.next());
AssertJUnit.assertFalse(iterator.hasNext());

// ensure correctness (each item returned exactly once) for a range of sizes
final String[] data = IntStream.range(0, 1000000).boxed().map(String::valueOf).sorted().toArray(String[]::new);
for (int i = 0; i < 4200; i++) {
assertIteratorCorrect(new ArrayWordList(Arrays.copyOfRange(data, 0, i)).medianIterator(), i);
}
// ensure correctness and no integer overflow with a large size
assertIteratorCorrect(new ArrayWordList(Arrays.copyOfRange(data, 0, 1000000)).medianIterator(), 1000000);
}


Expand Down