Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Invalid hex chars #48

Closed
wants to merge 5 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -407,38 +407,40 @@ public static long tryToParseEightHexDigitsUtf8(long chunk) {
// The following code is based on the technique presented in the paper
// by Leslie Lamport.

// The predicates are true if the hsb of a byte is set.

// Create a predicate for all bytes which are less than '0'
long lt_0 = chunk - 0x30_30_30_30_30_30_30_30L;
lt_0 &= 0x80_80_80_80_80_80_80_80L;

// Create a predicate for all bytes which are greater than '9'
long gt_9 = chunk + (0x39_39_39_39_39_39_39_39L ^ 0x7f_7f_7f_7f_7f_7f_7f_7fL);
gt_9 &= 0x80_80_80_80_80_80_80_80L;

// We can convert upper case characters to lower case by setting the 0x20 bit.
// (This does not have an impact on decimal digits, which is very handy!).
// Subtract character '0' (0x30) from each of the eight characters
long vec = (chunk | 0x20_20_20_20_20_20_20_20L) - 0x30_30_30_30_30_30_30_30L;

// Create a predicate for all bytes which are greater than '9'-'0' (0x09).
// The predicate is true if the hsb of a byte is set: (predicate & 0x80) != 0.
long gt_09 = vec + (0x09_09_09_09_09_09_09_09L ^ 0x7f_7f_7f_7f_7f_7f_7f_7fL);
gt_09 &= 0x80_80_80_80_80_80_80_80L;

// Create a predicate for all bytes which are greater or equal 'a'-'0' (0x30).
// The predicate is true if the hsb of a byte is set.
long ge_30 = vec + (0x30303030_30303030L ^ 0x7f_7f_7f_7f_7f_7f_7f_7fL);
ge_30 &= 0x80_80_80_80_80_80_80_80L;

// Create a predicate for all bytes which are smaller equal than 'f'-'0' (0x37).
long le_37 = 0x37_37_37_37_37_37_37_37L + (vec ^ 0x7f_7f_7f_7f_7f_7f_7f_7fL);
// we don't need to 'and' with 0x80…L here, because we 'and' this with ge_30 anyway.
//le_37 &= 0x80_80_80_80_80_80_80_80L;
// Create a predicate for all bytes which are greater or equal than 'a'-'0' (0x30).
long ge_a = vec + (0x30_30_30_30_30_30_30_30L ^ 0x7f_7f_7f_7f_7f_7f_7f_7fL);
ge_a &= 0x80_80_80_80_80_80_80_80L;

// Create a predicate for all bytes which are less or equal than 'f'-'0' (0x37).
long le_f = vec - 0x37_37_37_37_37_37_37_37L;
// we don't need to 'and' with 0x80…L here, because we 'and' this with ge_a anyway.
le_f &= 0x80_80_80_80_80_80_80_80L;

// If a character is greater than '9' then it must be greater equal 'a'
// and smaller 'f'.
if (gt_09 != (ge_30 & le_37)) {
// If a character is less than '0' or greater than '9' then it must be greater or equal than 'a' and less or equal then 'f'.
if (((lt_0 | gt_9) != (ge_a & le_f))) {
return -1;
}

// Expand the predicate to a byte mask
long gt_09mask = (gt_09 >>> 7) * 0xffL;
long gt_9mask = (gt_9 >>> 7) * 0xffL;

// Subtract 'a'-'0'+10 (0x27) from all bytes that are greater than 0x09.
long v = vec & ~gt_09mask | vec - (0x27272727_27272727L & gt_09mask);
long v = vec & ~gt_9mask | vec - (0x27272727_27272727L & gt_9mask);

// Compact all nibbles
return Long.compress(v, 0x0f0f0f0f_0f0f0f0fL);// since Java 19
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -62,6 +62,18 @@ public static int tryToParseEightDigits(CharSequence str, int offset) {
return FastDoubleVector.tryToParseEightDigitsUtf16(first, second);
}

public static long tryToParseEightHexDigits(CharSequence str, int offset) {
long first = (long) str.charAt(offset) << 48
| (long) str.charAt(offset + 1) << 32
| (long) str.charAt(offset + 2) << 16
| (long) str.charAt(offset + 3);
long second = (long) str.charAt(offset + 4) << 48
| (long) str.charAt(offset + 5) << 32
| (long) str.charAt(offset + 6) << 16
| (long) str.charAt(offset + 7);
return FastDoubleVector.tryToParseEightHexDigitsUtf16(first, second);
}

/**
* Tries to parse eight decimal digits from a char array using the
* Java Vector API.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
import org.junit.jupiter.api.DynamicNode;
import org.junit.jupiter.api.TestFactory;

import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;

Expand Down Expand Up @@ -61,7 +62,23 @@ public List<DynamicNode> dynamicTestsLegalEightHexDigitsLiterals() {
);
}

@TestFactory
public List<DynamicNode> dynamicTestsAllIllegalEightHexDigitsLiterals() {
List<DynamicNode> tests = new ArrayList<>();
for (int i = 0; i < 256; i++) {
final char c = (char) i;
if ((c < '0' || c > '9') && (c < 'A' || c > 'F') && (c < 'a' || c > 'f')) {
byte[] a = new byte[8];
Arrays.fill(a, (byte) c);
tests.add(dynamicTest(i + " " + c, () -> testHex(a, 0, -1)));
}
}
return tests;
}

abstract void testDec(String s, int offset, int expected);

abstract void testHex(String s, int offset, long expected);

abstract void testHex(byte[] b, int offset, long expected);
}
Original file line number Diff line number Diff line change
Expand Up @@ -12,9 +12,12 @@
public class EarlyAccessEightDigitsVectorTest extends AbstractEightDigitsTest {
@Override
void testDec(String s, int offset, int expected) {
int actual = FastDoubleVector.tryToParseEightDigits(s, offset);
assertEquals(expected, actual);

char[] chars = s.toCharArray();

int actual = FastDoubleVector.tryToParseEightDigitsUtf16(chars, offset);
actual = FastDoubleVector.tryToParseEightDigitsUtf16(chars, offset);
assertEquals(expected, actual);


Expand Down Expand Up @@ -59,36 +62,47 @@ void testDec(String s, int offset, int expected) {

@Override
void testHex(String s, int offset, long expected) {
long actual = FastDoubleVector.tryToParseEightHexDigits(s, offset);
if (expected < 0) {
assertTrue(actual < 0);
} else {
assertEquals(expected, actual);
}

char[] chars = s.toCharArray();
long actual = FastDoubleVector.tryToParseEightHexDigitsUtf16(chars, offset);
actual = FastDoubleVector.tryToParseEightHexDigitsUtf16(chars, offset);
if (expected < 0) {
assertTrue(actual < 0);
} else {
assertEquals(expected, actual);
}

long first = (long) chars[offset + 0]
| (long) chars[offset + 1] << 16
| (long) chars[offset + 2] << 32
| (long) chars[offset + 3] << 48;
long first = (long) chars[offset + 0] << 48
| (long) chars[offset + 1] << 32
| (long) chars[offset + 2] << 16
| (long) chars[offset + 3];

long second = (long) chars[offset + 4]
| (long) chars[offset + 5] << 16
| (long) chars[offset + 6] << 32
| (long) chars[offset + 7] << 48;
long second = (long) chars[offset + 4] << 48
| (long) chars[offset + 5] << 32
| (long) chars[offset + 6] << 16
| (long) chars[offset + 7];
actual = FastDoubleVector.tryToParseEightHexDigitsUtf16(first, second);
if (expected < 0) {
assertTrue(actual < 0);
} else {
assertEquals(expected, actual);
}

actual = FastDoubleVector.tryToParseEightHexDigitsUtf8(s.getBytes(StandardCharsets.UTF_8), offset);
testHex(s.getBytes(StandardCharsets.UTF_8), offset, expected);
}

@Override
void testHex(byte[] b, int offset, long expected) {
long actual = FastDoubleVector.tryToParseEightHexDigitsUtf8(b, offset);
if (expected < 0) {
assertTrue(actual < 0);
} else {
assertEquals(expected, actual);
}

}
}
Original file line number Diff line number Diff line change
Expand Up @@ -92,12 +92,16 @@ public void testHex(String s, int offset, long expected) {
assertEquals(expected, actual);
}

actual = FastDoubleSwar.tryToParseEightHexDigits(s.getBytes(StandardCharsets.UTF_8), offset);
testHex(s.getBytes(StandardCharsets.UTF_8), offset, expected);
}

@Override
void testHex(byte[] b, int offset, long expected) {
long actual = FastDoubleSwar.tryToParseEightHexDigits(b, offset);
if (expected < 0) {
assertTrue(actual < 0);
} else {
assertEquals(expected, actual);
}

}
}