Skip to content

Commit 8f87169

Browse files
authored
Sort JSON keys in nested columns to fix comparator inconsistency/stability (apache#18169)
Sort keys in JSON objects to resolve a transitivity bug in the StructuredData comparator. This would otherwise cause ingestion tasks and/or queries to fail intermittently.
1 parent f46124b commit 8f87169

File tree

2 files changed

+22
-2
lines changed

2 files changed

+22
-2
lines changed

processing/src/main/java/org/apache/druid/segment/nested/StructuredData.java

Lines changed: 10 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,8 @@
2121

2222
import com.fasterxml.jackson.annotation.JsonCreator;
2323
import com.fasterxml.jackson.core.JsonProcessingException;
24+
import com.fasterxml.jackson.databind.ObjectWriter;
25+
import com.fasterxml.jackson.databind.SerializationFeature;
2426
import com.google.common.primitives.Longs;
2527
import net.jpountz.xxhash.XXHash64;
2628
import net.jpountz.xxhash.XXHashFactory;
@@ -38,15 +40,21 @@ public class StructuredData implements Comparable<StructuredData>
3840
{
3941
private static final XXHash64 HASH_FUNCTION = XXHashFactory.fastestInstance().hash64();
4042

41-
// seed from the example... but, it doesn't matter what it is as long as its the same every time
43+
// seed from the example... but, it doesn't matter what it is as long as it's the same every time
4244
private static int SEED = 0x9747b28c;
4345

4446
public static final Comparator<StructuredData> COMPARATOR = Comparators.naturalNullsFirst();
4547

48+
/** SerializationFeature.ORDER_MAP_ENTRIES_BY_KEYS is required so that hash computations for JSON objects that
49+
* have different key orders but are otherwise equivalent will be consistent. See
50+
* {@link StructuredDataTest#testCompareToWithDifferentJSONOrder()} for an example
51+
*/
52+
private static final ObjectWriter WRITER = ColumnSerializerUtils.SMILE_MAPPER.writer(SerializationFeature.ORDER_MAP_ENTRIES_BY_KEYS);
53+
4654
private static long computeHash(StructuredData data)
4755
{
4856
try {
49-
final byte[] bytes = ColumnSerializerUtils.SMILE_MAPPER.writeValueAsBytes(data.value);
57+
final byte[] bytes = WRITER.writeValueAsBytes(data.value);
5058
return HASH_FUNCTION.hash(bytes, 0, bytes.length, SEED);
5159
}
5260
catch (JsonProcessingException e) {

processing/src/test/java/org/apache/druid/segment/nested/StructuredDataTest.java

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -95,6 +95,18 @@ public void testCompareTo()
9595

9696
}
9797

98+
@Test
99+
public void testCompareToWithDifferentJSONOrder()
100+
{
101+
StructuredData sd0 = new StructuredData(ImmutableMap.of("D", 0.0, "E", 0.0, "F", 0.0, "A", 0.0, "B", 0.0, "C", 0.0));
102+
StructuredData sd1 = new StructuredData(ImmutableMap.of("A", 0.0, "B", 0.0, "C", 0.0, "D", 0.0, "E", 0.0, "F", 0.0));
103+
StructuredData sd2 = new StructuredData(ImmutableMap.of("A", 0.0, "B", 0.0, "C", 0.0, "D", 34304.0, "E", 34304.0, "F", 34304.0));
104+
105+
Assert.assertEquals(1, sd0.compareTo(sd2));
106+
Assert.assertEquals(1, sd1.compareTo(sd2));
107+
Assert.assertEquals(0, sd0.compareTo(sd1));
108+
}
109+
98110
@Test
99111
public void testEqualsAndHashcode()
100112
{

0 commit comments

Comments
 (0)