diff --git a/src/main/java/org/passay/dictionary/AbstractWordList.java b/src/main/java/org/passay/dictionary/AbstractWordList.java index d4c8d567..f3e6afbc 100644 --- a/src/main/java/org/passay/dictionary/AbstractWordList.java +++ b/src/main/java/org/passay/dictionary/AbstractWordList.java @@ -3,6 +3,7 @@ import java.util.Comparator; import java.util.Iterator; +import java.util.NoSuchElementException; /** * Provides common operations implementations for word lists. @@ -72,9 +73,14 @@ protected void checkIsString(final Object o) private abstract class AbstractWordListIterator implements Iterator { - /** Index of next word in list. */ + /** Index of next word in the iterator sequence. */ protected int index; + @Override + public boolean hasNext() + { + return index < size(); + } @Override public void remove() @@ -92,73 +98,73 @@ public void remove() */ private class SequentialIterator extends AbstractWordListIterator { - - - @Override - public boolean hasNext() - { - return index < size(); - } - - @Override public String next() { + if (!hasNext()) { + throw new NoSuchElementException(); + } return get(index++); } } /** - * Iterator that iterates over a word list from the median outward to either end. In particular, for a word list of N - * elements whose median index is M, and for each i such that M-i >= 0 and M+i < N, the M-i element is visited before - * the M+i element. + * Iterator that iterates over a word list by following a sequence of medians. + * This enables the creation of a well-balanced search tree from a sorted word list. + *

+ * The sequence of median indices starts with the global median, + * followed by the median of the left half, the median of the right half, + * the median of the left half of the left half, etc. (recursively). * - * @author Middleware Services + * @author Amichai Rothman + * @author Ronen Zilberman */ private class MedianIterator extends AbstractWordListIterator { - /** Index of median element in given list. */ - private final int median = size() / 2; - - /** Indicates direction of next item. */ - private int sign; - - - @Override - public boolean hasNext() + /** + * Returns the i-th element in the sequence of median indices of the given size. + * + * @param i the index within the median sequence of the element to return + * @param size the size of the sequence + * @return the i-th element in the median indices sequence + */ + int toMedianIndex(final int i, final int size) { - final int n = size(); - final boolean result; - if (sign > 0) { - result = median + index < n; - } else if (sign < 0) { - result = median - index >= 0; + // we use long multiplication to avoid int overflow + // (good for all int inputs, beyond that double arithmetic is required) + final int powerOfTwo = Integer.highestOneBit(i + 1); + final long remainder = (i + 1) - powerOfTwo; + final int leftovers = size - powerOfTwo; + // all power of two levels that we can fill up completely go to the first case, + // the leftovers in the last incomplete power of two level go to the second case + // (also the special case of index 0 of the leftovers goes to the first + // case just so it'll return 0 and not fail due to an edge-case of rounding) + if (leftovers >= powerOfTwo || remainder == 0) { + // find the correct fraction (power of two denominator + // and indexed odd-numbered numerator), and multiply by size + // to get our median index + return (int) (size * (2 * remainder + 1) / (2 * powerOfTwo)); } else { - result = n > 0; + // find the correct fraction (denominator is the number of leftover + // items in the incomplete last power of two level, and indexed + // numerator), and multiply by size to get our median index. + // note that the leftovers (indices that were not returned by + // any of the smaller power of two levels) are evenly spaced, + // so they can be trivially calculated, with the added -1 for + // rounding them down properly) + return (int) ((size * remainder - 1) / leftovers); } - return result; } - @Override public String next() { - final String next; - if (sign > 0) { - next = get(median + index); - sign = -1; - index++; - } else if (sign < 0) { - next = get(median - index); - sign = 1; - } else { - next = get(median); - sign = -1; - index = 1; + if (!hasNext()) { + throw new NoSuchElementException(); } - return next; + return get(toMedianIndex(index++, size())); } } } diff --git a/src/main/java/org/passay/dictionary/TernaryTreeDictionary.java b/src/main/java/org/passay/dictionary/TernaryTreeDictionary.java index b09bbabf..aed6d851 100644 --- a/src/main/java/org/passay/dictionary/TernaryTreeDictionary.java +++ b/src/main/java/org/passay/dictionary/TernaryTreeDictionary.java @@ -237,7 +237,7 @@ public static void main(final String[] args) " \\"); System.out.println(""); System.out.println("where includes:"); - System.out.println(" -m (Insert dictionary using it's median) \\"); + System.out.println(" -m (Insert dictionary using its medians) \\"); System.out.println(" -ci (Make search case-insensitive) \\"); System.out.println(""); System.out.println("where includes:"); diff --git a/src/main/java/org/passay/dictionary/WordList.java b/src/main/java/org/passay/dictionary/WordList.java index ffa580f6..5108f7d9 100644 --- a/src/main/java/org/passay/dictionary/WordList.java +++ b/src/main/java/org/passay/dictionary/WordList.java @@ -41,7 +41,7 @@ public interface WordList /** - * Returns an iterator to traverse this word list from the median. + * Returns an iterator to traverse this word list by following a recursive sequence of medians. * * @return iterator for this word list */ diff --git a/src/test/java/org/passay/dictionary/AbstractWordListTest.java b/src/test/java/org/passay/dictionary/AbstractWordListTest.java index dda3c618..d85efb32 100644 --- a/src/test/java/org/passay/dictionary/AbstractWordListTest.java +++ b/src/test/java/org/passay/dictionary/AbstractWordListTest.java @@ -4,8 +4,14 @@ import java.io.IOException; import java.lang.reflect.Method; import java.util.ArrayList; +import java.util.Arrays; +import java.util.HashMap; +import java.util.HashSet; import java.util.Iterator; import java.util.List; +import java.util.Map; +import java.util.Set; +import java.util.stream.IntStream; import org.testng.AssertJUnit; import org.testng.annotations.AfterClass; import org.testng.annotations.DataProvider; @@ -198,25 +204,84 @@ public void iterator(final T list) throws Exception * Test for {@link WordList#medianIterator()}. * * @param list Word list to test. - * - * @throws Exception On test failure. */ @Test(groups = {"wltest"}, dataProvider = "shortWordLists") - public void medianIterator(final T list) throws Exception + public void medianIterator(final T list) { - final Iterator i = list.medianIterator(); - int index = list.size() / 2; - int count = 0; - while (i.hasNext()) { - final String s = i.next(); - AssertJUnit.assertEquals(list.get(index), s); - count++; - if (count % 2 == 0) { - index = index + count; - } else { - index = index - count; - } + final Iterator iterator = list.medianIterator(); + final Set found = new HashSet<>(); + final int size = list.size(); + final double[] fractions = {1 / 2d, 1 / 4d, 3 / 4d, 1 / 8d, 3 / 8d, 5 / 8d, 7 / 8d, 1 / 16d}; + for (int i = 0; i < fractions.length; i++) { + final int index = (int) (size * fractions[i]); + AssertJUnit.assertTrue(iterator.hasNext()); + AssertJUnit.assertEquals(list.get(index), iterator.next()); + found.add(list.get(index)); + } + while (iterator.hasNext()) { + found.add(iterator.next()); + } + AssertJUnit.assertEquals(size, found.size()); + } + + + /** + * Checks that an iterator is correct, i.e. each + * element is returned exactly once (regardless of order). + * + * @param iterator an iterator supplying unique elements + * @param size the expected number of elements + */ + private void assertIteratorCorrect(final Iterator iterator, final int size) + { + final Map found = new HashMap<>(size); + found.clear(); + while (iterator.hasNext()) { + final String next = iterator.next(); + AssertJUnit.assertFalse("got duplicate: " + next + " (size " + size + ")", found.containsKey(next)); + found.put(next, 1); + } + AssertJUnit.assertEquals("missing items (size " + size + ")", size, found.size()); + } + + /** + * Test for {@link WordList#medianIterator()} of various edge-cases and sizes. + */ + @Test(groups = {"wltest"}) + public void medianIterator() + { + // ensure median order in edge cases of 0-4 elements. + Iterator iterator; + iterator = new ArrayWordList(new String[] {}).medianIterator(); + AssertJUnit.assertFalse(iterator.hasNext()); + iterator = new ArrayWordList(new String[] {"0"}).medianIterator(); + AssertJUnit.assertTrue(iterator.hasNext()); + AssertJUnit.assertEquals("0", iterator.next()); + AssertJUnit.assertFalse(iterator.hasNext()); + iterator = new ArrayWordList(new String[] {"0", "1"}).medianIterator(); + AssertJUnit.assertTrue(iterator.hasNext()); + AssertJUnit.assertEquals("1", iterator.next()); + AssertJUnit.assertEquals("0", iterator.next()); + AssertJUnit.assertFalse(iterator.hasNext()); + iterator = new ArrayWordList(new String[] {"0", "1", "2"}).medianIterator(); + AssertJUnit.assertEquals("1", iterator.next()); + AssertJUnit.assertEquals("0", iterator.next()); + AssertJUnit.assertEquals("2", iterator.next()); + AssertJUnit.assertFalse(iterator.hasNext()); + iterator = new ArrayWordList(new String[] {"0", "1", "2", "3"}).medianIterator(); + AssertJUnit.assertEquals("2", iterator.next()); + AssertJUnit.assertEquals("1", iterator.next()); + AssertJUnit.assertEquals("3", iterator.next()); + AssertJUnit.assertEquals("0", iterator.next()); + AssertJUnit.assertFalse(iterator.hasNext()); + + // ensure correctness (each item returned exactly once) for a range of sizes + final String[] data = IntStream.range(0, 1000000).boxed().map(String::valueOf).sorted().toArray(String[]::new); + for (int i = 0; i < 4200; i++) { + assertIteratorCorrect(new ArrayWordList(Arrays.copyOfRange(data, 0, i)).medianIterator(), i); } + // ensure correctness and no integer overflow with a large size + assertIteratorCorrect(new ArrayWordList(Arrays.copyOfRange(data, 0, 1000000)).medianIterator(), 1000000); }