Skip to content

Commit 39408f5

Browse files
authored
Handle large write batches (#215)
* Handle reading large mutation batches in chunks. Unfortunately, SQLiteStatement.executeForBlobFileDescriptor is unreliable, sometimes returning null even when the row exists and the blob has a value (!!).
1 parent 4465f60 commit 39408f5

File tree

4 files changed

+148
-26
lines changed

4 files changed

+148
-26
lines changed

firebase-firestore/CHANGELOG.md

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,6 @@
11
# Unreleased
2+
- [fixed] Fixed an issue where Firestore would crash if handling write batches
3+
larger than 2 MB in size (#208).
24

35
# 18.0.0
46
- [changed] The `timestampsInSnapshotsEnabled` setting is now enabled by

firebase-firestore/src/androidTest/java/com/google/firebase/firestore/WriteBatchTest.java

Lines changed: 38 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -32,7 +32,10 @@
3232
import com.google.firebase.firestore.FirebaseFirestoreException.Code;
3333
import com.google.firebase.firestore.testutil.EventAccumulator;
3434
import com.google.firebase.firestore.testutil.IntegrationTestUtil;
35+
import com.google.firebase.firestore.util.Util;
3536
import java.util.Arrays;
37+
import java.util.HashMap;
38+
import java.util.Map;
3639
import org.junit.After;
3740
import org.junit.Test;
3841
import org.junit.runner.RunWith;
@@ -253,4 +256,39 @@ public void testCanWriteTheSameDocumentMultipleTimes() {
253256
assertNotNull(when);
254257
assertEquals(map("a", 1L, "b", 2L, "when", when), serverSnap.getData());
255258
}
259+
260+
@Test
261+
public void testCanWriteVeryLargeBatches() {
262+
// On Android, SQLite Cursors are limited reading no more than 2 MB per row (despite being able
263+
// to write very large values). This test verifies that the SQLiteMutationQueue properly works
264+
// around this limitation.
265+
266+
// Create a map containing nearly 1 MB of data. Note that if you use 1024 below this will create
267+
// a document larger than 1 MB, which will be rejected by the backend as too large.
268+
String a = Character.toString('a');
269+
StringBuilder buf = new StringBuilder(1000);
270+
for (int i = 0; i < 1000; i++) {
271+
buf.append(a);
272+
}
273+
String kb = buf.toString();
274+
Map<String, Object> values = new HashMap<>();
275+
for (int j = 0; j < 1000; j++) {
276+
values.put(Util.autoId(), kb);
277+
}
278+
279+
DocumentReference doc = testDocument();
280+
WriteBatch batch = doc.getFirestore().batch();
281+
282+
// Write a batch containing 3 copies of the data, creating a ~3 MB batch. Writing to the same
283+
// document in a batch is allowed and so long as the net size of the document is under 1 MB the
284+
// batch is allowed.
285+
batch.set(doc, values);
286+
for (int i = 0; i < 2; i++) {
287+
batch.update(doc, values);
288+
}
289+
290+
waitFor(batch.commit());
291+
DocumentSnapshot snap = waitFor(doc.get());
292+
assertEquals(values, snap.getData());
293+
}
256294
}

firebase-firestore/src/main/java/com/google/firebase/firestore/local/SQLiteMutationQueue.java

Lines changed: 108 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@
1818
import static com.google.firebase.firestore.util.Assert.fail;
1919
import static com.google.firebase.firestore.util.Assert.hardAssert;
2020

21+
import android.database.Cursor;
2122
import android.database.sqlite.SQLiteStatement;
2223
import com.google.firebase.Timestamp;
2324
import com.google.firebase.firestore.auth.User;
@@ -27,6 +28,7 @@
2728
import com.google.firebase.firestore.model.mutation.Mutation;
2829
import com.google.firebase.firestore.model.mutation.MutationBatch;
2930
import com.google.firebase.firestore.remote.WriteStream;
31+
import com.google.firebase.firestore.util.Consumer;
3032
import com.google.firebase.firestore.util.Util;
3133
import com.google.protobuf.ByteString;
3234
import com.google.protobuf.InvalidProtocolBufferException;
@@ -42,6 +44,18 @@
4244
/** A mutation queue for a specific user, backed by SQLite. */
4345
final class SQLiteMutationQueue implements MutationQueue {
4446

47+
/**
48+
* On Android, SQLite Cursors are limited reading no more than 2 MB per row (despite being able to
49+
* write very large values). All reads of the mutations column in the mutations table need to read
50+
* in chunks with SUBSTR to avoid going over this limit.
51+
*
52+
* <p>The value here has to be 2 MB or smaller, while allowing for all possible other values that
53+
* might be selected out along with the mutations column in any given result set. Nearly 1 MB is
54+
* conservative, but allows all combinations of document paths and batch ids without needing to
55+
* figure out if the row has gotten too large.
56+
*/
57+
private static final int BLOB_MAX_INLINE_LENGTH = 1000000;
58+
4559
private final SQLitePersistence db;
4660
private final LocalSerializer serializer;
4761

@@ -90,10 +104,7 @@ public void start() {
90104
int rows =
91105
db.query("SELECT last_stream_token FROM mutation_queues WHERE uid = ?")
92106
.binding(uid)
93-
.first(
94-
row -> {
95-
lastStreamToken = ByteString.copyFrom(row.getBlob(0));
96-
});
107+
.first(row -> lastStreamToken = ByteString.copyFrom(row.getBlob(0)));
97108

98109
if (rows == 0) {
99110
// Ensure we write a default entry in mutation_queues since loadNextBatchIdAcrossAllUsers()
@@ -204,9 +215,9 @@ public MutationBatch addMutationBatch(Timestamp localWriteTime, List<Mutation> m
204215
@Nullable
205216
@Override
206217
public MutationBatch lookupMutationBatch(int batchId) {
207-
return db.query("SELECT mutations FROM mutations WHERE uid = ? AND batch_id = ?")
208-
.binding(uid, batchId)
209-
.firstValue(row -> decodeMutationBatch(row.getBlob(0)));
218+
return db.query("SELECT SUBSTR(mutations, 1, ?) FROM mutations WHERE uid = ? AND batch_id = ?")
219+
.binding(BLOB_MAX_INLINE_LENGTH, uid, batchId)
220+
.firstValue(row -> decodeInlineMutationBatch(batchId, row.getBlob(0)));
210221
}
211222

212223
@Nullable
@@ -215,19 +226,22 @@ public MutationBatch getNextMutationBatchAfterBatchId(int batchId) {
215226
int nextBatchId = batchId + 1;
216227

217228
return db.query(
218-
"SELECT mutations FROM mutations "
229+
"SELECT batch_id, SUBSTR(mutations, 1, ?) FROM mutations "
219230
+ "WHERE uid = ? AND batch_id >= ? "
220231
+ "ORDER BY batch_id ASC LIMIT 1")
221-
.binding(uid, nextBatchId)
222-
.firstValue(row -> decodeMutationBatch(row.getBlob(0)));
232+
.binding(BLOB_MAX_INLINE_LENGTH, uid, nextBatchId)
233+
.firstValue(row -> decodeInlineMutationBatch(row.getInt(0), row.getBlob(1)));
223234
}
224235

225236
@Override
226237
public List<MutationBatch> getAllMutationBatches() {
227238
List<MutationBatch> result = new ArrayList<>();
228-
db.query("SELECT mutations FROM mutations WHERE uid = ? ORDER BY batch_id ASC")
229-
.binding(uid)
230-
.forEach(row -> result.add(decodeMutationBatch(row.getBlob(0))));
239+
db.query(
240+
"SELECT batch_id, SUBSTR(mutations, 1, ?) "
241+
+ "FROM mutations "
242+
+ "WHERE uid = ? ORDER BY batch_id ASC")
243+
.binding(BLOB_MAX_INLINE_LENGTH, uid)
244+
.forEach(row -> result.add(decodeInlineMutationBatch(row.getInt(0), row.getBlob(1))));
231245
return result;
232246
}
233247

@@ -237,14 +251,15 @@ public List<MutationBatch> getAllMutationBatchesAffectingDocumentKey(DocumentKey
237251

238252
List<MutationBatch> result = new ArrayList<>();
239253
db.query(
240-
"SELECT m.mutations FROM document_mutations dm, mutations m "
254+
"SELECT m.batch_id, SUBSTR(m.mutations, 1, ?) "
255+
+ "FROM document_mutations dm, mutations m "
241256
+ "WHERE dm.uid = ? "
242257
+ "AND dm.path = ? "
243258
+ "AND dm.uid = m.uid "
244259
+ "AND dm.batch_id = m.batch_id "
245260
+ "ORDER BY dm.batch_id")
246-
.binding(uid, path)
247-
.forEach(row -> result.add(decodeMutationBatch(row.getBlob(0))));
261+
.binding(BLOB_MAX_INLINE_LENGTH, uid, path)
262+
.forEach(row -> result.add(decodeInlineMutationBatch(row.getInt(0), row.getBlob(1))));
248263
return result;
249264
}
250265

@@ -259,10 +274,11 @@ public List<MutationBatch> getAllMutationBatchesAffectingDocumentKeys(
259274
SQLitePersistence.LongQuery longQuery =
260275
new SQLitePersistence.LongQuery(
261276
db,
262-
"SELECT DISTINCT dm.batch_id, m.mutations FROM document_mutations dm, mutations m "
277+
"SELECT DISTINCT dm.batch_id, SUBSTR(m.mutations, 1, ?) "
278+
+ "FROM document_mutations dm, mutations m "
263279
+ "WHERE dm.uid = ? "
264280
+ "AND dm.path IN (",
265-
Arrays.asList(uid),
281+
Arrays.asList(BLOB_MAX_INLINE_LENGTH, uid),
266282
args,
267283
") "
268284
+ "AND dm.uid = m.uid "
@@ -279,7 +295,7 @@ public List<MutationBatch> getAllMutationBatchesAffectingDocumentKeys(
279295
int batchId = row.getInt(0);
280296
if (!uniqueBatchIds.contains(batchId)) {
281297
uniqueBatchIds.add(batchId);
282-
result.add(decodeMutationBatch(row.getBlob(1)));
298+
result.add(decodeInlineMutationBatch(batchId, row.getBlob(1)));
283299
}
284300
});
285301
}
@@ -321,14 +337,15 @@ public List<MutationBatch> getAllMutationBatchesAffectingQuery(Query query) {
321337

322338
List<MutationBatch> result = new ArrayList<>();
323339
db.query(
324-
"SELECT dm.batch_id, dm.path, m.mutations FROM document_mutations dm, mutations m "
340+
"SELECT dm.batch_id, dm.path, SUBSTR(m.mutations, 1, ?) "
341+
+ "FROM document_mutations dm, mutations m "
325342
+ "WHERE dm.uid = ? "
326343
+ "AND dm.path >= ? "
327344
+ "AND dm.path < ? "
328345
+ "AND dm.uid = m.uid "
329346
+ "AND dm.batch_id = m.batch_id "
330347
+ "ORDER BY dm.batch_id")
331-
.binding(uid, prefixPath, prefixSuccessorPath)
348+
.binding(BLOB_MAX_INLINE_LENGTH, uid, prefixPath, prefixSuccessorPath)
332349
.forEach(
333350
row -> {
334351
// Ensure unique batches only. This works because the batches come out in order so we
@@ -350,7 +367,7 @@ public List<MutationBatch> getAllMutationBatchesAffectingQuery(Query query) {
350367
return;
351368
}
352369

353-
result.add(decodeMutationBatch(row.getBlob(2)));
370+
result.add(decodeInlineMutationBatch(batchId, row.getBlob(2)));
354371
});
355372

356373
return result;
@@ -399,12 +416,79 @@ public void performConsistencyCheck() {
399416
danglingMutationReferences);
400417
}
401418

402-
private MutationBatch decodeMutationBatch(byte[] bytes) {
419+
/**
420+
* Decodes mutation batch bytes obtained via substring. If the blob is smaller than
421+
* BLOB_MAX_INLINE_LENGTH, executes additional queries to load the rest of the blob.
422+
*
423+
* @param batchId The batch ID of the row containing the bytes, for fallback lookup if the value
424+
* is too large.
425+
* @param bytes The bytes of the first chunk of the mutation batch. Should be obtained via
426+
* SUBSTR(mutations, 1, BLOB_MAX_INLINE_LENGTH).
427+
*/
428+
private MutationBatch decodeInlineMutationBatch(int batchId, byte[] bytes) {
403429
try {
430+
if (bytes.length < BLOB_MAX_INLINE_LENGTH) {
431+
return serializer.decodeMutationBatch(
432+
com.google.firebase.firestore.proto.WriteBatch.parseFrom(bytes));
433+
}
434+
435+
BlobAccumulator accumulator = new BlobAccumulator(bytes);
436+
while (accumulator.more) {
437+
// As we read in chunks the start of the next chunk should be the total accumulated length
438+
// plus 1 (since SUBSTR() counts from 1). The second argument is not adjusted because it's
439+
// the length of the chunk, not the end index.
440+
int start = accumulator.numChunks() * BLOB_MAX_INLINE_LENGTH + 1;
441+
442+
db.query("SELECT SUBSTR(mutations, ?, ?) FROM mutations WHERE uid = ? AND batch_id = ?")
443+
.binding(start, BLOB_MAX_INLINE_LENGTH, uid, batchId)
444+
.first(accumulator);
445+
}
446+
447+
ByteString blob = accumulator.result();
404448
return serializer.decodeMutationBatch(
405-
com.google.firebase.firestore.proto.WriteBatch.parseFrom(bytes));
449+
com.google.firebase.firestore.proto.WriteBatch.parseFrom(blob));
406450
} catch (InvalidProtocolBufferException e) {
407451
throw fail("MutationBatch failed to parse: %s", e);
408452
}
409453
}
454+
455+
/**
456+
* Explicit consumer of blob chunks, accumulating the chunks and wrapping them in a single
457+
* ByteString. Accepts a Cursor whose results include the blob in column 0.
458+
*
459+
* <p>(This is a named class here to allow decodeInlineMutationBlock to access the result of the
460+
* accumulation.)
461+
*/
462+
private static class BlobAccumulator implements Consumer<Cursor> {
463+
private final ArrayList<ByteString> chunks = new ArrayList<>();
464+
private boolean more = true;
465+
466+
BlobAccumulator(byte[] firstChunk) {
467+
addChunk(firstChunk);
468+
}
469+
470+
int numChunks() {
471+
return chunks.size();
472+
}
473+
474+
ByteString result() {
475+
// Not actually a copy; this creates a balanced rope-like structure that reuses the given
476+
// ByteStrings as a part of its representation.
477+
return ByteString.copyFrom(chunks);
478+
}
479+
480+
@Override
481+
public void accept(Cursor row) {
482+
byte[] bytes = row.getBlob(0);
483+
addChunk(bytes);
484+
if (bytes.length < BLOB_MAX_INLINE_LENGTH) {
485+
more = false;
486+
}
487+
}
488+
489+
private void addChunk(byte[] bytes) {
490+
ByteString wrapped = ByteString.copyFrom(bytes);
491+
chunks.add(wrapped);
492+
}
493+
}
410494
}

firebase-firestore/src/test/java/com/google/firebase/firestore/local/MutationQueueTestCase.java

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,6 @@
2727
import static org.junit.Assert.assertFalse;
2828
import static org.junit.Assert.assertNotNull;
2929
import static org.junit.Assert.assertNull;
30-
import static org.junit.Assert.assertThat;
3130
import static org.junit.Assert.assertTrue;
3231

3332
import com.google.firebase.Timestamp;
@@ -326,7 +325,6 @@ public void testAllMutationBatchesAffectingQuery_withCompoundBatches() {
326325
@Test
327326
public void testRemoveMutationBatches() {
328327
List<MutationBatch> batches = createBatches(10);
329-
MutationBatch last = batches.get(batches.size() - 1);
330328

331329
removeMutationBatches(batches.remove(0));
332330
assertEquals(9, batchCount());

0 commit comments

Comments
 (0)