diff --git a/firebase-firestore/CHANGELOG.md b/firebase-firestore/CHANGELOG.md index f14e653e79f..939f70c93bb 100644 --- a/firebase-firestore/CHANGELOG.md +++ b/firebase-firestore/CHANGELOG.md @@ -1,4 +1,5 @@ # Unreleased +* [fixed] Use lazy encoding in UTF-8 encoded byte comparison for strings to solve performance issues. [#6706](//github.com/firebase/firebase-android-sdk/pull/6706) * [changed] Updated `protolite-well-known-types` dependency to `18.0.1`. [#6716] diff --git a/firebase-firestore/src/androidTest/java/com/google/firebase/firestore/FirestoreTest.java b/firebase-firestore/src/androidTest/java/com/google/firebase/firestore/FirestoreTest.java index 796632e192e..95dcd2863fe 100644 --- a/firebase-firestore/src/androidTest/java/com/google/firebase/firestore/FirestoreTest.java +++ b/firebase-firestore/src/androidTest/java/com/google/firebase/firestore/FirestoreTest.java @@ -1658,17 +1658,33 @@ public void sdkOrdersQueryByDocumentIdTheSameWayOnlineAndOffline() { public void snapshotListenerSortsUnicodeStringsAsServer() { Map> testDocs = map( - "a", map("value", "Łukasiewicz"), - "b", map("value", "Sierpiński"), - "c", map("value", "岩澤"), - "d", map("value", "🄟"), - "e", map("value", "P"), - "f", map("value", "︒"), - "g", map("value", "🐵")); + "a", + map("value", "Łukasiewicz"), + "b", + map("value", "Sierpiński"), + "c", + map("value", "岩澤"), + "d", + map("value", "🄟"), + "e", + map("value", "P"), + "f", + map("value", "︒"), + "g", + map("value", "🐵"), + "h", + map("value", "你好"), + "i", + map("value", "你顥"), + "j", + map("value", "😁"), + "k", + map("value", "😀")); CollectionReference colRef = testCollectionWithDocs(testDocs); Query orderedQuery = colRef.orderBy("value"); - List expectedDocIds = Arrays.asList("b", "a", "c", "f", "e", "d", "g"); + List expectedDocIds = + Arrays.asList("b", "a", "h", "i", "c", "f", "e", "d", "g", "k", "j"); QuerySnapshot getSnapshot = waitFor(orderedQuery.get()); List getSnapshotDocIds = @@ -1699,17 +1715,33 @@ public void snapshotListenerSortsUnicodeStringsAsServer() { public void snapshotListenerSortsUnicodeStringsInArrayAsServer() { Map> testDocs = map( - "a", map("value", Arrays.asList("Łukasiewicz")), - "b", map("value", Arrays.asList("Sierpiński")), - "c", map("value", Arrays.asList("岩澤")), - "d", map("value", Arrays.asList("🄟")), - "e", map("value", Arrays.asList("P")), - "f", map("value", Arrays.asList("︒")), - "g", map("value", Arrays.asList("🐵"))); + "a", + map("value", Arrays.asList("Łukasiewicz")), + "b", + map("value", Arrays.asList("Sierpiński")), + "c", + map("value", Arrays.asList("岩澤")), + "d", + map("value", Arrays.asList("🄟")), + "e", + map("value", Arrays.asList("P")), + "f", + map("value", Arrays.asList("︒")), + "g", + map("value", Arrays.asList("🐵")), + "h", + map("value", Arrays.asList("你好")), + "i", + map("value", Arrays.asList("你顥")), + "j", + map("value", Arrays.asList("😁")), + "k", + map("value", Arrays.asList("😀"))); CollectionReference colRef = testCollectionWithDocs(testDocs); Query orderedQuery = colRef.orderBy("value"); - List expectedDocIds = Arrays.asList("b", "a", "c", "f", "e", "d", "g"); + List expectedDocIds = + Arrays.asList("b", "a", "h", "i", "c", "f", "e", "d", "g", "k", "j"); QuerySnapshot getSnapshot = waitFor(orderedQuery.get()); List getSnapshotDocIds = @@ -1740,17 +1772,33 @@ public void snapshotListenerSortsUnicodeStringsInArrayAsServer() { public void snapshotListenerSortsUnicodeStringsInMapAsServer() { Map> testDocs = map( - "a", map("value", map("foo", "Łukasiewicz")), - "b", map("value", map("foo", "Sierpiński")), - "c", map("value", map("foo", "岩澤")), - "d", map("value", map("foo", "🄟")), - "e", map("value", map("foo", "P")), - "f", map("value", map("foo", "︒")), - "g", map("value", map("foo", "🐵"))); + "a", + map("value", map("foo", "Łukasiewicz")), + "b", + map("value", map("foo", "Sierpiński")), + "c", + map("value", map("foo", "岩澤")), + "d", + map("value", map("foo", "🄟")), + "e", + map("value", map("foo", "P")), + "f", + map("value", map("foo", "︒")), + "g", + map("value", map("foo", "🐵")), + "h", + map("value", map("foo", "你好")), + "i", + map("value", map("foo", "你顥")), + "j", + map("value", map("foo", "😁")), + "k", + map("value", map("foo", "😀"))); CollectionReference colRef = testCollectionWithDocs(testDocs); Query orderedQuery = colRef.orderBy("value"); - List expectedDocIds = Arrays.asList("b", "a", "c", "f", "e", "d", "g"); + List expectedDocIds = + Arrays.asList("b", "a", "h", "i", "c", "f", "e", "d", "g", "k", "j"); QuerySnapshot getSnapshot = waitFor(orderedQuery.get()); List getSnapshotDocIds = @@ -1781,17 +1829,33 @@ public void snapshotListenerSortsUnicodeStringsInMapAsServer() { public void snapshotListenerSortsUnicodeStringsInMapKeyAsServer() { Map> testDocs = map( - "a", map("value", map("Łukasiewicz", "foo")), - "b", map("value", map("Sierpiński", "foo")), - "c", map("value", map("岩澤", "foo")), - "d", map("value", map("🄟", "foo")), - "e", map("value", map("P", "foo")), - "f", map("value", map("︒", "foo")), - "g", map("value", map("🐵", "foo"))); + "a", + map("value", map("Łukasiewicz", "foo")), + "b", + map("value", map("Sierpiński", "foo")), + "c", + map("value", map("岩澤", "foo")), + "d", + map("value", map("🄟", "foo")), + "e", + map("value", map("P", "foo")), + "f", + map("value", map("︒", "foo")), + "g", + map("value", map("🐵", "foo")), + "h", + map("value", map("你好", "foo")), + "i", + map("value", map("你顥", "foo")), + "j", + map("value", map("😁", "foo")), + "k", + map("value", map("😀", "foo"))); CollectionReference colRef = testCollectionWithDocs(testDocs); Query orderedQuery = colRef.orderBy("value"); - List expectedDocIds = Arrays.asList("b", "a", "c", "f", "e", "d", "g"); + List expectedDocIds = + Arrays.asList("b", "a", "h", "i", "c", "f", "e", "d", "g", "k", "j"); QuerySnapshot getSnapshot = waitFor(orderedQuery.get()); List getSnapshotDocIds = @@ -1822,18 +1886,83 @@ public void snapshotListenerSortsUnicodeStringsInMapKeyAsServer() { public void snapshotListenerSortsUnicodeStringsInDocumentKeyAsServer() { Map> testDocs = map( - "Łukasiewicz", map("value", "foo"), - "Sierpiński", map("value", "foo"), - "岩澤", map("value", "foo"), - "🄟", map("value", "foo"), - "P", map("value", "foo"), - "︒", map("value", "foo"), - "🐵", map("value", "foo")); + "Łukasiewicz", + map("value", "foo"), + "Sierpiński", + map("value", "foo"), + "岩澤", + map("value", "foo"), + "🄟", + map("value", "foo"), + "P", + map("value", "foo"), + "︒", + map("value", "foo"), + "🐵", + map("value", "foo"), + "你好", + map("value", "foo"), + "你顥", + map("value", "foo"), + "😁", + map("value", "foo"), + "😀", + map("value", "foo")); CollectionReference colRef = testCollectionWithDocs(testDocs); Query orderedQuery = colRef.orderBy(FieldPath.documentId()); List expectedDocIds = - Arrays.asList("Sierpiński", "Łukasiewicz", "岩澤", "︒", "P", "🄟", "🐵"); + Arrays.asList( + "Sierpiński", "Łukasiewicz", "你好", "你顥", "岩澤", "︒", "P", "🄟", "🐵", "😀", "😁"); + + QuerySnapshot getSnapshot = waitFor(orderedQuery.get()); + List getSnapshotDocIds = + getSnapshot.getDocuments().stream().map(ds -> ds.getId()).collect(Collectors.toList()); + + EventAccumulator eventAccumulator = new EventAccumulator(); + ListenerRegistration registration = + orderedQuery.addSnapshotListener(eventAccumulator.listener()); + + List watchSnapshotDocIds = new ArrayList<>(); + try { + QuerySnapshot watchSnapshot = eventAccumulator.await(); + watchSnapshotDocIds = + watchSnapshot.getDocuments().stream() + .map(documentSnapshot -> documentSnapshot.getId()) + .collect(Collectors.toList()); + } finally { + registration.remove(); + } + + assertTrue(getSnapshotDocIds.equals(expectedDocIds)); + assertTrue(watchSnapshotDocIds.equals(expectedDocIds)); + + checkOnlineAndOfflineResultsMatch(orderedQuery, expectedDocIds.toArray(new String[0])); + } + + @Test + public void snapshotListenerSortsInvalidUnicodeStringsAsServer() { + // Note: Protocol Buffer converts any invalid surrogates to "?". + Map> testDocs = + map( + "a", + map("value", "Z"), + "b", + map("value", "你好"), + "c", + map("value", "😀"), + "d", + map("value", "ab\uD800"), // Lone high surrogate + "e", + map("value", "ab\uDC00"), // Lone low surrogate + "f", + map("value", "ab\uD800\uD800"), // Unpaired high surrogate + "g", + map("value", "ab\uDC00\uDC00")); // Unpaired low surrogate + + CollectionReference colRef = testCollectionWithDocs(testDocs); + Query orderedQuery = colRef.orderBy("value"); + List expectedDocIds = Arrays.asList("a", "d", "e", "f", "g", "b", "c"); QuerySnapshot getSnapshot = waitFor(orderedQuery.get()); List getSnapshotDocIds = diff --git a/firebase-firestore/src/main/java/com/google/firebase/firestore/util/Util.java b/firebase-firestore/src/main/java/com/google/firebase/firestore/util/Util.java index 543da11e7d3..2cc39337002 100644 --- a/firebase-firestore/src/main/java/com/google/firebase/firestore/util/Util.java +++ b/firebase-firestore/src/main/java/com/google/firebase/firestore/util/Util.java @@ -87,9 +87,44 @@ public static int compareIntegers(int i1, int i2) { /** Compare strings in UTF-8 encoded byte order */ public static int compareUtf8Strings(String left, String right) { - ByteString leftBytes = ByteString.copyFromUtf8(left); - ByteString rightBytes = ByteString.copyFromUtf8(right); - return compareByteStrings(leftBytes, rightBytes); + int i = 0; + while (i < left.length() && i < right.length()) { + int leftCodePoint = left.codePointAt(i); + int rightCodePoint = right.codePointAt(i); + + if (leftCodePoint != rightCodePoint) { + if (leftCodePoint < 128 && rightCodePoint < 128) { + // ASCII comparison + return Integer.compare(leftCodePoint, rightCodePoint); + } else { + // substring and do UTF-8 encoded byte comparison + ByteString leftBytes = ByteString.copyFromUtf8(getUtf8SafeBytes(left, i)); + ByteString rightBytes = ByteString.copyFromUtf8(getUtf8SafeBytes(right, i)); + int comp = compareByteStrings(leftBytes, rightBytes); + if (comp != 0) { + return comp; + } else { + // EXTREMELY RARE CASE: Code points differ, but their UTF-8 byte representations are + // identical. This can happen with malformed input (invalid surrogate pairs), where + // Java's encoding leads to unexpected byte sequences. Meanwhile, any invalid surrogate + // inputs get converted to "?" by protocol buffer while round tripping, so we almost + // never receive invalid strings from backend. + // Fallback to code point comparison for graceful handling. + return Integer.compare(leftCodePoint, rightCodePoint); + } + } + } + // Increment by 2 for surrogate pairs, 1 otherwise. + i += Character.charCount(leftCodePoint); + } + + // Compare lengths if all characters are equal + return Integer.compare(left.length(), right.length()); + } + + private static String getUtf8SafeBytes(String str, int index) { + int firstCodePoint = str.codePointAt(index); + return str.substring(index, index + Character.charCount(firstCodePoint)); } /** diff --git a/firebase-firestore/src/test/java/com/google/firebase/firestore/util/UtilTest.java b/firebase-firestore/src/test/java/com/google/firebase/firestore/util/UtilTest.java index 6ff424ef994..ccd88854ba7 100644 --- a/firebase-firestore/src/test/java/com/google/firebase/firestore/util/UtilTest.java +++ b/firebase-firestore/src/test/java/com/google/firebase/firestore/util/UtilTest.java @@ -17,6 +17,7 @@ import static com.google.common.truth.Truth.assertThat; import static com.google.firebase.firestore.util.Util.firstNEntries; import static org.junit.Assert.assertEquals; +import static org.junit.Assert.fail; import com.google.firebase.firestore.testutil.TestUtil; import com.google.protobuf.ByteString; @@ -26,6 +27,7 @@ import java.util.HashMap; import java.util.List; import java.util.Map; +import java.util.Random; import org.junit.Test; import org.junit.runner.RunWith; import org.junit.runners.JUnit4; @@ -87,4 +89,184 @@ private void validateDiffCollection(List before, List after) { Util.diffCollections(before, after, String::compareTo, result::add, result::remove); assertThat(result).containsExactlyElementsIn(after); } + + @Test + public void compareUtf8StringsShouldReturnCorrectValue() { + ArrayList errors = new ArrayList<>(); + int seed = new Random().nextInt(Integer.MAX_VALUE); + int passCount = 0; + StringGenerator stringGenerator = new StringGenerator(29750468); + StringPairGenerator stringPairGenerator = new StringPairGenerator(stringGenerator); + for (int i = 0; i < 1_000_000 && errors.size() < 10; i++) { + StringPairGenerator.StringPair stringPair = stringPairGenerator.next(); + final String s1 = stringPair.s1; + final String s2 = stringPair.s2; + + int actual = Util.compareUtf8Strings(s1, s2); + + ByteString b1 = ByteString.copyFromUtf8(s1); + ByteString b2 = ByteString.copyFromUtf8(s2); + int expected = Util.compareByteStrings(b1, b2); + + if (actual == expected) { + passCount++; + } else { + errors.add( + "compareUtf8Strings(s1=\"" + + s1 + + "\", s2=\"" + + s2 + + "\") returned " + + actual + + ", but expected " + + expected + + " (i=" + + i + + ", s1.length=" + + s1.length() + + ", s2.length=" + + s2.length() + + ")"); + } + } + + if (!errors.isEmpty()) { + StringBuilder sb = new StringBuilder(); + sb.append(errors.size()).append(" test cases failed, "); + sb.append(passCount).append(" test cases passed, "); + sb.append("seed=").append(seed).append(";"); + for (int i = 0; i < errors.size(); i++) { + sb.append("\nerrors[").append(i).append("]: ").append(errors.get(i)); + } + fail(sb.toString()); + } + } + + private static class StringPairGenerator { + + private final StringGenerator stringGenerator; + + public StringPairGenerator(StringGenerator stringGenerator) { + this.stringGenerator = stringGenerator; + } + + public StringPair next() { + String prefix = stringGenerator.next(); + String s1 = prefix + stringGenerator.next(); + String s2 = prefix + stringGenerator.next(); + return new StringPair(s1, s2); + } + + public static class StringPair { + public final String s1, s2; + + public StringPair(String s1, String s2) { + this.s1 = s1; + this.s2 = s2; + } + } + } + + private static class StringGenerator { + + private static final float DEFAULT_SURROGATE_PAIR_PROBABILITY = 0.33f; + private static final int DEFAULT_MAX_LENGTH = 20; + + private static final int MIN_HIGH_SURROGATE = 0xD800; + private static final int MAX_HIGH_SURROGATE = 0xDBFF; + private static final int MIN_LOW_SURROGATE = 0xDC00; + private static final int MAX_LOW_SURROGATE = 0xDFFF; + + private final Random rnd; + private final float surrogatePairProbability; + private final int maxLength; + + public StringGenerator(int seed) { + this(new Random(seed), DEFAULT_SURROGATE_PAIR_PROBABILITY, DEFAULT_MAX_LENGTH); + } + + public StringGenerator(Random rnd, float surrogatePairProbability, int maxLength) { + this.rnd = rnd; + this.surrogatePairProbability = validateProbability(surrogatePairProbability); + this.maxLength = validateLength(maxLength); + } + + private static float validateProbability(float probability) { + if (!Float.isFinite(probability)) { + throw new IllegalArgumentException( + "invalid surrogate pair probability: " + + probability + + " (must be between 0.0 and 1.0, inclusive)"); + } else if (probability < 0.0f) { + throw new IllegalArgumentException( + "invalid surrogate pair probability: " + + probability + + " (must be greater than or equal to zero)"); + } else if (probability > 1.0f) { + throw new IllegalArgumentException( + "invalid surrogate pair probability: " + + probability + + " (must be less than or equal to 1)"); + } + return probability; + } + + private static int validateLength(int length) { + if (length < 0) { + throw new IllegalArgumentException( + "invalid maximum string length: " + + length + + " (must be greater than or equal to zero)"); + } + return length; + } + + public String next() { + final int length = rnd.nextInt(maxLength + 1); + final StringBuilder sb = new StringBuilder(); + while (sb.length() < length) { + int codePoint = nextCodePoint(); + sb.appendCodePoint(codePoint); + } + return sb.toString(); + } + + private boolean isNextSurrogatePair() { + return nextBoolean(rnd, surrogatePairProbability); + } + + private static boolean nextBoolean(Random rnd, float probability) { + if (probability == 0.0f) { + return false; + } else if (probability == 1.0f) { + return true; + } else { + return rnd.nextFloat() < probability; + } + } + + private int nextCodePoint() { + if (isNextSurrogatePair()) { + return nextSurrogateCodePoint(); + } else { + return nextNonSurrogateCodePoint(); + } + } + + private int nextSurrogateCodePoint() { + int highSurrogate = + rnd.nextInt(MAX_HIGH_SURROGATE - MIN_HIGH_SURROGATE + 1) + MIN_HIGH_SURROGATE; + int lowSurrogate = rnd.nextInt(MAX_LOW_SURROGATE - MIN_LOW_SURROGATE + 1) + MIN_LOW_SURROGATE; + return Character.toCodePoint((char) highSurrogate, (char) lowSurrogate); + } + + private int nextNonSurrogateCodePoint() { + int codePoint; + do { + codePoint = rnd.nextInt(0x10000); // BMP range + } while (codePoint >= MIN_HIGH_SURROGATE + && codePoint <= MAX_LOW_SURROGATE); // Exclude surrogate range + return codePoint; + } + } }