diff --git a/README.md b/README.md index 4d2f113..d3eeb4f 100644 --- a/README.md +++ b/README.md @@ -692,7 +692,7 @@ Events are processed at over 220K/second except for: - wildcard matches, which are processed at over 170K/second. - anything-but matches, which are processed at over 150K/second. - numeric matches, which are processed at over 120K/second. - - complex array matches, which are processed at over 2.5K/second. + - complex array matches, which are processed at over 5K/second. ### Suggestions for better performance diff --git a/pom.xml b/pom.xml index 017f3bf..99de521 100644 --- a/pom.xml +++ b/pom.xml @@ -32,6 +32,7 @@ 1.8 2.14.0 + 8.5.9 3.0.2 4.13.2 3.10.1 @@ -98,6 +99,11 @@ test + + it.unimi.dsi + fastutil-core + ${fastutil.version} + @@ -147,7 +153,6 @@ ${checkstyle.plugin.version} ${basedir}/config/checkstyle/checkstyle.xml - ${project.build.sourceEncoding} true false false diff --git a/src/main/software/amazon/event/ruler/ArrayMembership.java b/src/main/software/amazon/event/ruler/ArrayMembership.java index d8158a9..2487a2f 100644 --- a/src/main/software/amazon/event/ruler/ArrayMembership.java +++ b/src/main/software/amazon/event/ruler/ArrayMembership.java @@ -1,28 +1,39 @@ package software.amazon.event.ruler; +import it.unimi.dsi.fastutil.ints.Int2IntAVLTreeMap; +import it.unimi.dsi.fastutil.ints.Int2IntMap; + /** * Represents which JSON arrays within an Event structure a particular field appears within, and at which position. * The arrays are identified using integers. */ class ArrayMembership { - private static final IntIntMap EMPTY = new IntIntMap(); + private static final Int2IntAVLTreeMap EMPTY = createNewIntIntMap(); + public static final int NO_VALUE = -1; // Keys and values may only be positive. - private IntIntMap membership; + private final Int2IntAVLTreeMap membership; ArrayMembership() { - membership = new IntIntMap(); + this.membership = createNewIntIntMap(); } ArrayMembership(final ArrayMembership membership) { if (membership.size() == 0) { this.membership = EMPTY; } else { - this.membership = (IntIntMap) membership.membership.clone(); + this.membership = createNewIntIntMap(); + this.membership.putAll(membership.membership); } } + private static Int2IntAVLTreeMap createNewIntIntMap() { + final Int2IntAVLTreeMap membership = new Int2IntAVLTreeMap(); + membership.defaultReturnValue(NO_VALUE); + return membership; + } + void putMembership(int array, int index) { - if (index == IntIntMap.NO_VALUE) { + if (index == NO_VALUE) { membership.remove(array); } else { membership.put(array, index); @@ -44,8 +55,8 @@ private int size() { // for debugging public String toString() { StringBuilder sb = new StringBuilder(); - for (IntIntMap.Entry entry : membership.entries()) { - sb.append(entry.getKey()).append('[').append(entry.getValue()).append("] "); + for (Int2IntMap.Entry entry : membership.int2IntEntrySet()) { + sb.append(entry.getIntKey()).append('[').append(entry.getIntValue()).append("] "); } return sb.toString(); } @@ -70,12 +81,12 @@ static ArrayMembership checkArrayConsistency(final ArrayMembership membershipSoF // any change will come from memberships in the new field we're investigating. For each of its memberships ArrayMembership newMembership = null; - for (IntIntMap.Entry arrayEntry : fieldMembership.membership.entries()) { - final int array = arrayEntry.getKey(); - final int indexInThisArrayOfThisField = arrayEntry.getValue(); + for (Int2IntMap.Entry arrayEntry : fieldMembership.membership.int2IntEntrySet()) { + final int array = arrayEntry.getIntKey(); + final int indexInThisArrayOfThisField = arrayEntry.getIntValue(); final int indexInThisArrayPreviouslyAppearingInMatch = membershipSoFar.getMembership(array); - if (indexInThisArrayPreviouslyAppearingInMatch == IntIntMap.NO_VALUE) { + if (indexInThisArrayPreviouslyAppearingInMatch == NO_VALUE) { // if there's no membership so far, this is an acceptable delta. Update the new memberships, first // creating it if necessary diff --git a/src/main/software/amazon/event/ruler/IntIntMap.java b/src/main/software/amazon/event/ruler/IntIntMap.java deleted file mode 100644 index 46b054a..0000000 --- a/src/main/software/amazon/event/ruler/IntIntMap.java +++ /dev/null @@ -1,348 +0,0 @@ -package software.amazon.event.ruler; - -import java.util.Arrays; -import java.util.Iterator; -import java.util.NoSuchElementException; - -/** - * A fast primitive int-int map implementation. Keys and values may only be positive. - */ -class IntIntMap implements Cloneable { - - // taken from FastUtil - private static final int INT_PHI = 0x9E3779B9; - - private static long KEY_MASK = 0xFFFFFFFFL; - private static final long EMPTY_CELL = -1 & KEY_MASK; - - public static final int NO_VALUE = -1; - private static final float DEFAULT_LOAD_FACTOR = 0.75f; - - /** - * Capacity of 8, with data type long, translates to an initial {@link #table} of 64 bytes, - * which fits perfectly into the common cache line size. - */ - private static final int DEFAULT_INITIAL_CAPACITY = 8; - - /** - * Holds key-value int pairs. The highest 32 bits hold the int value, and the lowest 32 bits - * hold the int key. Must always have a length that is a power of two so that {@link #mask} can - * be computed correctly. - */ - private long[] table; - - /** - * Load factor, must be between (0 and 1) - */ - private final float loadFactor; - - /** - * We will resize a map once it reaches this size - */ - private int threshold; - - /** - * Current map size - */ - private int size; - - /** - * Mask to calculate the position in the table for a key. - */ - private int mask; - - IntIntMap() { - this(DEFAULT_INITIAL_CAPACITY); - } - - IntIntMap(final int initialCapacity) { - this(initialCapacity, DEFAULT_LOAD_FACTOR); - } - - IntIntMap(final int initialCapacity, final float loadFactor) { - if (loadFactor <= 0 || loadFactor >= 1) { - throw new IllegalArgumentException("loadFactor must be in (0, 1)"); - } - if (initialCapacity <= 0) { - throw new IllegalArgumentException("initialCapacity must be positive"); - } - if (Integer.bitCount(initialCapacity) != 1) { - throw new IllegalArgumentException("initialCapacity must be a power of two"); - } - this.mask = initialCapacity - 1; - this.loadFactor = loadFactor; - this.table = makeTable(initialCapacity); - this.threshold = (int) (initialCapacity * loadFactor); - } - - /** - * Gets the value for {@code key}. - * - * @param key - * the non-negative key - * @return the value present at {@code key}, or {@link #NO_VALUE} if none is present. - */ - int get(final int key) { - int idx = getStartIndex(key); - do { - long cell = table[idx]; - if (cell == EMPTY_CELL) { - // end of the chain, key does not exist - return NO_VALUE; - } - if (((int) (cell & KEY_MASK)) == key) { - // found the key - return (int) (cell >> 32); - } - // continue walking the chain - idx = getNextIndex(idx); - } while (true); - } - - /** - * Puts {@code value} in {@code key}. {@code key} is restricted to positive integers to avoid an - * unresolvable collision with {@link #EMPTY_CELL}, while {@code value} is restricted to - * positive integers to avoid an unresolvable collision with {@link #NO_VALUE}. - * - * @param key - * the non-negative key - * @param value - * the non-negative value - * @return the value that was previously set for {@code key}, or {@link #NO_VALUE} if none was - * present. - * @throws IllegalArgumentException - * if {@code key} is negative - */ - int put(final int key, final int value) { - if (key < 0) { - throw new IllegalArgumentException("key cannot be negative"); - } - if (value < 0) { - throw new IllegalArgumentException("value cannot be negative"); - } - long cellToPut = (((long) key) & KEY_MASK) | (((long) value) << 32); - int idx = getStartIndex(key); - do { - long cell = table[idx]; - if (cell == EMPTY_CELL) { - // found an empty cell - table[idx] = cellToPut; - if (size >= threshold) { - rehash(table.length * 2); - // 'size' is set inside rehash() - } else { - size++; - } - return NO_VALUE; - } - if (((int) (cell & KEY_MASK)) == key) { - // found a non-empty cell with a key matching the one we're writing, so overwrite it - table[idx] = cellToPut; - return (int) (cell >> 32); - } - // continue walking the chain - idx = getNextIndex(idx); - } while (true); - } - - /** - * Removes {@code key}. - * - * @param key - * the non-negative key - * @return the removed value, or {@link #NO_VALUE} if none was present. - * @throws IllegalArgumentException - * if {@code key} is negative - */ - int remove(final int key) { - int idx = getStartIndex(key); - do { - long cell = table[idx]; - if (cell == EMPTY_CELL) { - // end of the chain, key does not exist - return NO_VALUE; - } - if (((int) (cell & KEY_MASK)) == key) { - // found the key - size--; - shiftKeys(idx); - return (int) (cell >> 32); - } - // continue walking the chain - idx = getNextIndex(idx); - } while (true); - } - - /** - * Returns the number of key-value mappings in this map. - * - * @return the number of key-value mappings in this map - */ - int size() { - return size; - } - - boolean isEmpty() { - return size == 0; - } - - public Iterable entries() { - return new Iterable () { - - @Override - public Iterator iterator() { - return new EntryIterator(); - } - - }; - } - - @Override - public Object clone() { - IntIntMap result; - try { - result = (IntIntMap) super.clone(); - } catch (CloneNotSupportedException e) { - // this shouldn't happen, since we are Cloneable - throw new InternalError(e); - } - result.table = table.clone(); - return result; - } - - /** - * Shifts entries with the same hash. - */ - private void shiftKeys(final int index) { - int last; - int pos = index; - while (true) { - last = pos; - do { - pos = (pos + 1) & mask; - if (table[pos] == EMPTY_CELL) { - table[last] = EMPTY_CELL; - return; - } - int key = (int) (table[pos] & KEY_MASK); - int keyStartIndex = getStartIndex(key); - if (last < pos) { // did we wrap around? - /* - * (no) if the previous position is after the chain startIndex for key, *or* the - * chain startIndex of the key is after the position we're checking, then the - * position we're checking now cannot be a part of the current chain - */ - if (last >= keyStartIndex || keyStartIndex > pos) { - break; - } - } else { - /* - * (yes) if the previous position is after the chain startIndex for key, *and* - * the chain startIndex of key is after the position we're checking, then the - * position we're checking now cannot be a part of the current chain - */ - if (last >= keyStartIndex && keyStartIndex > pos) { - break; - } - } - } while (true); - table[last] = table[pos]; - } - } - - private void rehash(final int newCapacity) { - threshold = (int) (newCapacity * loadFactor); - mask = newCapacity - 1; - - final int oldCapacity = table.length; - final long[] oldTable = table; - - table = makeTable(newCapacity); - size = 0; - - for (int i = oldCapacity - 1; i >= 0; i--) { - if (oldTable[i] != EMPTY_CELL) { - final int oldKey = (int) (oldTable[i] & KEY_MASK); - final int oldValue = (int) (oldTable[i] >> 32); - put(oldKey, oldValue); - } - } - } - - private static long[] makeTable(final int capacity) { - long[] result = new long[capacity]; - Arrays.fill(result, EMPTY_CELL); - return result; - } - - private int getStartIndex(final int key) { - return phiMix(key) & mask; - } - - private int getNextIndex(final int currentIndex) { - return (currentIndex + 1) & mask; - } - - /** - * Computes hashcode for {@code val}. - * - * @param val - * @return the hashcode for {@code val} - */ - private static int phiMix(final int val) { - final int h = val * INT_PHI; - return h ^ (h >> 16); - } - - static class Entry { - private final int key; - private final int value; - - private Entry(final int key, final int value) { - this.key = key; - this.value = value; - } - - public int getKey() { - return key; - } - - public int getValue() { - return value; - } - } - - private class EntryIterator implements Iterator { - - private static final int NO_NEXT_INDEX = -1; - - private int nextIndex = findNextIndex(0); - - @Override - public boolean hasNext() { - return nextIndex != NO_NEXT_INDEX; - } - - @Override - public Entry next() { - if (nextIndex == NO_NEXT_INDEX) { - throw new NoSuchElementException(); - } - Entry entry = new Entry((int) (table[nextIndex] & KEY_MASK), (int) (table[nextIndex] >> 32)); - nextIndex = findNextIndex(nextIndex + 1); - return entry; - } - - private int findNextIndex(int fromIndex) { - while (fromIndex < table.length) { - if (table[fromIndex] != EMPTY_CELL) { - return fromIndex; - } - fromIndex++; - } - return NO_NEXT_INDEX; - } - - } - -} diff --git a/src/test/software/amazon/event/ruler/EventTest.java b/src/test/software/amazon/event/ruler/EventTest.java index 31e6ab9..9fcd4ea 100644 --- a/src/test/software/amazon/event/ruler/EventTest.java +++ b/src/test/software/amazon/event/ruler/EventTest.java @@ -122,8 +122,8 @@ public void WHEN_EventIsConstructed_THEN_HeterogeneousArraysAreHandled() throws "lines.points", "lines.points", "lines.points", "lines.points", "lines.points.pp", "lines.points.pp" }; String[] wantedArrayMemberships = { - "0[0] 2[0] 1[0] ", "0[0] 2[0] 1[0] ", "0[0] 2[1] 1[0] ", "0[0] 2[1] 1[0] ", "0[0] 2[0] 3[2] 1[0] ", - "0[0] 4[0] 2[1] 1[0] " + "0[0] 1[0] 2[0] ", "0[0] 1[0] 2[0] ", "0[0] 1[0] 2[1] ", "0[0] 1[0] 2[1] ", "0[0] 1[0] 2[0] 3[2] ", + "0[0] 1[0] 2[1] 4[0] " }; Machine m = new Machine(); diff --git a/src/test/software/amazon/event/ruler/IntIntMapTest.java b/src/test/software/amazon/event/ruler/IntIntMapTest.java deleted file mode 100644 index 59091c6..0000000 --- a/src/test/software/amazon/event/ruler/IntIntMapTest.java +++ /dev/null @@ -1,189 +0,0 @@ -package software.amazon.event.ruler; - -import org.junit.Assert; -import org.junit.Test; - -import java.util.ArrayList; -import java.util.HashMap; -import java.util.HashSet; -import java.util.Iterator; -import java.util.List; -import java.util.Map; -import java.util.NoSuchElementException; -import java.util.Random; -import java.util.Set; - -public class IntIntMapTest { - - @Test(expected=IllegalArgumentException.class) - public void constructor_disallowsInitialCapacityThatIsNotAPowerOfTwo() { - new IntIntMap(3); - } - - @Test - public void put_replacesOriginalValue() { - IntIntMap map = new IntIntMap(); - Assert.assertEquals(IntIntMap.NO_VALUE, map.put(10, 100)); - Assert.assertEquals(1, map.size()); - Assert.assertEquals(100, map.put(10, 200)); - Assert.assertEquals(1, map.size()); - } - - @Test - public void put_disallowsNegativeKeys() { - IntIntMap map = new IntIntMap(); - try { - map.put(-1234, 5678); - Assert.fail("expected IllegalArgumentException"); - } catch (IllegalArgumentException ex) { - } - } - - @Test - public void put_disallowsNegativeValues() { - IntIntMap map = new IntIntMap(); - try { - map.put(1234, -5678); - Assert.fail("expected IllegalArgumentException"); - } catch (IllegalArgumentException ex) { - } - } - - @Test - public void get_canRetrieveValues() { - IntIntMap map = new IntIntMap(); - for (int key = 0; key < 1000; key++) { - Assert.assertEquals(IntIntMap.NO_VALUE, map.put(key, key * 2)); - } - for (int key = 0; key < 1000; key++) { - Assert.assertEquals(key * 2, map.get(key)); - } - - Assert.assertEquals(IntIntMap.NO_VALUE, map.get(1001)); - } - - @Test - public void remove_canRemoveValues() { - IntIntMap map = new IntIntMap(); - Assert.assertEquals(IntIntMap.NO_VALUE, map.remove(0)); - map.put(1234, 5678); - Assert.assertEquals(5678, map.remove(1234)); - Assert.assertTrue(map.isEmpty()); - } - - @Test - public void iterator_returnsAllValues() { - IntIntMap map = new IntIntMap(); - Map baseline = new HashMap<>(); - for (int key = 0; key < 1000; key++) { - map.put(key, key * 2); - baseline.put(key, key * 2); - } - - List entries = new ArrayList<>(); - map.entries().iterator().forEachRemaining(entries::add); - - Assert.assertEquals(1000, entries.size()); - for (IntIntMap.Entry entry : entries) { - Assert.assertEquals(map.get(entry.getKey()), entry.getValue()); - Assert.assertEquals(baseline.get(entry.getKey()).intValue(), entry.getValue()); - } - } - - @Test - public void iterator_returnsEmptyIteratorForEmptyMap() { - IntIntMap map = new IntIntMap(); - Iterator iter = map.entries().iterator(); - Assert.assertFalse(iter.hasNext()); - } - - @Test - public void iterator_throwsNoSuchElementExceptionWhenNextIsCalledWithNoMoreElements() { - IntIntMap map = new IntIntMap(); - map.put(1, 100); - Iterator iter = map.entries().iterator(); - Assert.assertTrue(iter.hasNext()); - IntIntMap.Entry entry = iter.next(); - Assert.assertEquals(1, entry.getKey()); - Assert.assertEquals(100, entry.getValue()); - - try { - iter.next(); - Assert.fail("expected NoSuchElementException"); - } catch (NoSuchElementException ex) { - // expected - } - } - - @Test - public void clone_createsNewBackingTable() { - IntIntMap map = new IntIntMap(); - map.put(123, 456); - - IntIntMap cloneMap = (IntIntMap) map.clone(); - cloneMap.put(123, 789); - - Assert.assertEquals(456, map.get(123)); - Assert.assertEquals(789, cloneMap.get(123)); - } - - @Test - public void stressTest() { - // deterministic seed to prevent unit test flakiness - long seed = 1; - Random random = new Random(seed); - - // set a high load factor to increase the chances that we'll see lots of hash collisions - float loadFactor = 0.99f; - IntIntMap map = new IntIntMap(2, loadFactor); - - Map baseline = new HashMap<>(); - - for (int trial = 0; trial < 50; trial++) { - for (int i = 0; i < 100_000; i++) { - int key = random.nextInt(Integer.MAX_VALUE); - int value = random.nextInt(Integer.MAX_VALUE); - - int mapOut = map.put(key, value); - Integer baselineOut = baseline.put(key, value); - - Assert.assertEquals(baselineOut == null ? IntIntMap.NO_VALUE : baselineOut.intValue(), mapOut); - Assert.assertEquals(baseline.size(), map.size()); - } - - // Now remove half, randomly - Set baselineKeys = new HashSet<>(baseline.keySet()); - for (Integer key : baselineKeys) { - if (random.nextBoolean()) { - Assert.assertEquals(baseline.remove(key).intValue(), map.remove(key)); - } - } - Assert.assertEquals(baseline.size(), map.size()); - } - } - - @Test - public void stressTest_rehash() { - // deterministic seed to prevent unit test flakiness - long seed = 1; - Random random = new Random(seed); - - for (int trial = 0; trial < 100_000; trial++) { - // start the map off with the smallest possible initial capacity - IntIntMap map = new IntIntMap(1); - Map baseline = new HashMap<>(); - - for (int i = 0 ; i < 16; i++) { - int key = random.nextInt(Integer.MAX_VALUE); - int value = random.nextInt(Integer.MAX_VALUE); - map.put(key, value); - baseline.put(key, value); - } - - for (Integer key : baseline.keySet()) { - Assert.assertEquals(baseline.get(key).intValue(), map.get(key)); - } - } - } - -}