diff --git a/assets/l10n/app_en.arb b/assets/l10n/app_en.arb index ce6d8e7e68..d1a08ded77 100644 --- a/assets/l10n/app_en.arb +++ b/assets/l10n/app_en.arb @@ -188,6 +188,19 @@ "error": {"type": "String", "example": "Invalid format"} } }, + "errorHandlingEventTitle": "Error handling a Zulip event. Retrying connection…", + "@errorHandlingEventTitle": { + "description": "Error title on failing to handle a Zulip server event." + }, + "errorHandlingEventDetails": "Error handling a Zulip event from {serverUrl}; will retry.\n\nError: {error}\n\nEvent: {event}", + "@errorHandlingEventDetails": { + "description": "Error details on failing to handle a Zulip server event.", + "placeholders": { + "serverUrl": {"type": "String", "example": "https://chat.example.com"}, + "error": {"type": "String", "example": "Unexpected null value"}, + "event": {"type": "String", "example": "UpdateMessageEvent(id: 123, messageIds: [2345, 3456], newTopic: 'dinner')"} + } + }, "errorSharingFailed": "Sharing failed", "@errorSharingFailed": { "description": "Error message when sharing a message failed." diff --git a/lib/api/exception.dart b/lib/api/exception.dart index f38d33ebe3..cda75b4b07 100644 --- a/lib/api/exception.dart +++ b/lib/api/exception.dart @@ -91,7 +91,7 @@ class NetworkException extends ApiRequestException { /// This should always represent either some kind of operational issue /// on the server, or a bug in the server where its responses don't /// agree with the documented API. -abstract class ServerException extends ApiRequestException { +sealed class ServerException extends ApiRequestException { final int httpStatus; /// The response body, decoded as a JSON object. diff --git a/lib/api/model/events.dart b/lib/api/model/events.dart index 31d7a47917..a90869cc3b 100644 --- a/lib/api/model/events.dart +++ b/lib/api/model/events.dart @@ -215,6 +215,7 @@ sealed class RealmUserEvent extends Event { @JsonSerializable(fieldRename: FieldRename.snake) class RealmUserAddEvent extends RealmUserEvent { @override + @JsonKey(includeToJson: true) String get op => 'add'; final User person; @@ -231,6 +232,7 @@ class RealmUserAddEvent extends RealmUserEvent { /// A [RealmUserEvent] with op `remove`: https://zulip.com/api/get-events#realm_user-remove class RealmUserRemoveEvent extends RealmUserEvent { @override + @JsonKey(includeToJson: true) String get op => 'remove'; final int userId; @@ -268,6 +270,7 @@ class RealmUserUpdateCustomProfileField { @JsonSerializable(fieldRename: FieldRename.snake) class RealmUserUpdateEvent extends RealmUserEvent { @override + @JsonKey(includeToJson: true) String get op => 'update'; @JsonKey(readValue: _readFromPerson) final int userId; @@ -351,6 +354,7 @@ sealed class ChannelEvent extends Event { @JsonSerializable(fieldRename: FieldRename.snake) class ChannelCreateEvent extends ChannelEvent { @override + @JsonKey(includeToJson: true) String get op => 'create'; final List streams; @@ -368,6 +372,7 @@ class ChannelCreateEvent extends ChannelEvent { @JsonSerializable(fieldRename: FieldRename.snake) class ChannelDeleteEvent extends ChannelEvent { @override + @JsonKey(includeToJson: true) String get op => 'delete'; final List streams; @@ -385,6 +390,7 @@ class ChannelDeleteEvent extends ChannelEvent { @JsonSerializable(fieldRename: FieldRename.snake) class ChannelUpdateEvent extends ChannelEvent { @override + @JsonKey(includeToJson: true) String get op => 'update'; final int streamId; @@ -666,6 +672,7 @@ class UserTopicEvent extends Event { // in order to skip the boilerplate in [fromJson] and [toJson]. class MessageEvent extends Event { @override + @JsonKey(includeToJson: true) String get type => 'message'; // In the server API, the `flags` field appears directly on the event rather @@ -857,6 +864,7 @@ sealed class UpdateMessageFlagsEvent extends Event { @JsonSerializable(fieldRename: FieldRename.snake) class UpdateMessageFlagsAddEvent extends UpdateMessageFlagsEvent { @override + @JsonKey(includeToJson: true) String get op => 'add'; final bool all; @@ -879,6 +887,7 @@ class UpdateMessageFlagsAddEvent extends UpdateMessageFlagsEvent { @JsonSerializable(fieldRename: FieldRename.snake) class UpdateMessageFlagsRemoveEvent extends UpdateMessageFlagsEvent { @override + @JsonKey(includeToJson: true) String get op => 'remove'; // final bool all; // deprecated, ignore diff --git a/lib/api/model/events.g.dart b/lib/api/model/events.g.dart index ce11092ef4..43fdfbfbe3 100644 --- a/lib/api/model/events.g.dart +++ b/lib/api/model/events.g.dart @@ -94,6 +94,7 @@ Map _$RealmUserAddEventToJson(RealmUserAddEvent instance) => { 'id': instance.id, 'type': instance.type, + 'op': instance.op, 'person': instance.person, }; @@ -160,6 +161,7 @@ Map _$RealmUserUpdateEventToJson( { 'id': instance.id, 'type': instance.type, + 'op': instance.op, 'user_id': instance.userId, 'full_name': instance.fullName, 'avatar_url': instance.avatarUrl, @@ -210,6 +212,7 @@ Map _$ChannelCreateEventToJson(ChannelCreateEvent instance) => { 'id': instance.id, 'type': instance.type, + 'op': instance.op, 'streams': instance.streams, }; @@ -225,6 +228,7 @@ Map _$ChannelDeleteEventToJson(ChannelDeleteEvent instance) => { 'id': instance.id, 'type': instance.type, + 'op': instance.op, 'streams': instance.streams, }; @@ -247,6 +251,7 @@ Map _$ChannelUpdateEventToJson(ChannelUpdateEvent instance) => { 'id': instance.id, 'type': instance.type, + 'op': instance.op, 'stream_id': instance.streamId, 'name': instance.name, 'property': _$ChannelPropertyNameEnumMap[instance.property], @@ -517,6 +522,7 @@ Map _$UpdateMessageFlagsAddEventToJson( 'type': instance.type, 'flag': instance.flag, 'messages': instance.messages, + 'op': instance.op, 'all': instance.all, }; @@ -544,6 +550,7 @@ Map _$UpdateMessageFlagsRemoveEventToJson( 'type': instance.type, 'flag': instance.flag, 'messages': instance.messages, + 'op': instance.op, 'message_details': instance.messageDetails?.map((k, e) => MapEntry(k.toString(), e)), }; diff --git a/lib/generated/l10n/zulip_localizations.dart b/lib/generated/l10n/zulip_localizations.dart index ad7421b187..74856a64e0 100644 --- a/lib/generated/l10n/zulip_localizations.dart +++ b/lib/generated/l10n/zulip_localizations.dart @@ -343,6 +343,18 @@ abstract class ZulipLocalizations { /// **'Error connecting to Zulip at {serverUrl}. Will retry:\n\n{error}'** String errorConnectingToServerDetails(String serverUrl, String error); + /// Error title on failing to handle a Zulip server event. + /// + /// In en, this message translates to: + /// **'Error handling a Zulip event. Retrying connection…'** + String get errorHandlingEventTitle; + + /// Error details on failing to handle a Zulip server event. + /// + /// In en, this message translates to: + /// **'Error handling a Zulip event from {serverUrl}; will retry.\n\nError: {error}\n\nEvent: {event}'** + String errorHandlingEventDetails(String serverUrl, String error, String event); + /// Error message when sharing a message failed. /// /// In en, this message translates to: diff --git a/lib/generated/l10n/zulip_localizations_ar.dart b/lib/generated/l10n/zulip_localizations_ar.dart index 70d48f7412..09e4ae82e9 100644 --- a/lib/generated/l10n/zulip_localizations_ar.dart +++ b/lib/generated/l10n/zulip_localizations_ar.dart @@ -157,6 +157,14 @@ class ZulipLocalizationsAr extends ZulipLocalizations { return 'Error connecting to Zulip at $serverUrl. Will retry:\n\n$error'; } + @override + String get errorHandlingEventTitle => 'Error handling a Zulip event. Retrying connection…'; + + @override + String errorHandlingEventDetails(String serverUrl, String error, String event) { + return 'Error handling a Zulip event from $serverUrl; will retry.\n\nError: $error\n\nEvent: $event'; + } + @override String get errorSharingFailed => 'Sharing failed'; diff --git a/lib/generated/l10n/zulip_localizations_en.dart b/lib/generated/l10n/zulip_localizations_en.dart index c87194028f..da5fad3552 100644 --- a/lib/generated/l10n/zulip_localizations_en.dart +++ b/lib/generated/l10n/zulip_localizations_en.dart @@ -157,6 +157,14 @@ class ZulipLocalizationsEn extends ZulipLocalizations { return 'Error connecting to Zulip at $serverUrl. Will retry:\n\n$error'; } + @override + String get errorHandlingEventTitle => 'Error handling a Zulip event. Retrying connection…'; + + @override + String errorHandlingEventDetails(String serverUrl, String error, String event) { + return 'Error handling a Zulip event from $serverUrl; will retry.\n\nError: $error\n\nEvent: $event'; + } + @override String get errorSharingFailed => 'Sharing failed'; diff --git a/lib/generated/l10n/zulip_localizations_ja.dart b/lib/generated/l10n/zulip_localizations_ja.dart index 8cd12d2b37..693f0a36f9 100644 --- a/lib/generated/l10n/zulip_localizations_ja.dart +++ b/lib/generated/l10n/zulip_localizations_ja.dart @@ -157,6 +157,14 @@ class ZulipLocalizationsJa extends ZulipLocalizations { return 'Error connecting to Zulip at $serverUrl. Will retry:\n\n$error'; } + @override + String get errorHandlingEventTitle => 'Error handling a Zulip event. Retrying connection…'; + + @override + String errorHandlingEventDetails(String serverUrl, String error, String event) { + return 'Error handling a Zulip event from $serverUrl; will retry.\n\nError: $error\n\nEvent: $event'; + } + @override String get errorSharingFailed => 'Sharing failed'; diff --git a/lib/model/store.dart b/lib/model/store.dart index acb9d9bcae..55cae0d3ba 100644 --- a/lib/model/store.dart +++ b/lib/model/store.dart @@ -954,6 +954,7 @@ class UpdateMachine { } Completer? _debugLoopSignal; + Object? _debugLoopError; /// In debug mode, causes the polling loop to pause before the next /// request and wait for [debugAdvanceLoop] to be called. @@ -965,15 +966,45 @@ class UpdateMachine { }()); } + /// In debug mode, causes the next [debugAdvanceLoop] call to induce + /// the given error to be thrown from the polling loop. + void debugPrepareLoopError(Object error) { + assert(() { + assert(_debugLoopError == null); + _debugLoopError = error; + return true; + }()); + } + /// In debug mode, after a call to [debugPauseLoop], causes the /// polling loop to make one more request and then pause again. + /// + /// If [debugPrepareLoopError] was called since the last [debugAdvanceLoop] + /// or [debugPauseLoop], the polling loop will throw the prepared error + /// instead of making a request. void debugAdvanceLoop() { assert((){ - _debugLoopSignal!.complete(); + if (_debugLoopError != null) { + _debugLoopSignal!.completeError(_debugLoopError!); + } else { + _debugLoopSignal!.complete(); + } return true; }()); } + // This is static so that it persists through new UpdateMachine instances + // as we attempt to fix things by reloading data from scratch. In principle + // it could also be per-account (or per-realm or per-server); but currently + // we skip that complication, as well as attempting to reset backoff on + // later success. After all, these unexpected errors should be uncommon; + // ideally they'd never happen. + static BackoffMachine get _unexpectedErrorBackoffMachine { + return __unexpectedErrorBackoffMachine + ??= BackoffMachine(maxBound: const Duration(seconds: 60)); + } + static BackoffMachine? __unexpectedErrorBackoffMachine; + /// This controls when we start to report transient errors to the user when /// polling. /// @@ -983,120 +1014,188 @@ class UpdateMachine { void poll() async { assert(!_disposed); - - BackoffMachine? backoffMachine; - int accumulatedTransientFailureCount = 0; - - /// This only reports transient errors after reaching - /// a pre-defined threshold of retries. - void maybeReportTransientError(String? message, {String? details}) { - accumulatedTransientFailureCount++; - if (accumulatedTransientFailureCount > transientFailureCountNotifyThreshold) { - reportErrorToUserBriefly(message, details: details); + try { + BackoffMachine? backoffMachine; + int accumulatedTransientFailureCount = 0; + + /// This only reports transient errors after reaching + /// a pre-defined threshold of retries. + void maybeReportToUserTransientError(Object error) { + accumulatedTransientFailureCount++; + if (accumulatedTransientFailureCount > transientFailureCountNotifyThreshold) { + _reportToUserErrorConnectingToServer(error); + } } - } - while (true) { - if (_debugLoopSignal != null) { - await _debugLoopSignal!.future; - if (_disposed) return; - assert(() { - _debugLoopSignal = Completer(); - return true; - }()); - } + while (true) { + if (_debugLoopSignal != null) { + await _debugLoopSignal!.future; + if (_disposed) return; + assert(() { + _debugLoopSignal = Completer(); + return true; + }()); + } - final GetEventsResult result; - try { - result = await getEvents(store.connection, - queueId: queueId, lastEventId: lastEventId); - if (_disposed) return; - } catch (e) { - if (_disposed) return; + final GetEventsResult result; + try { + result = await getEvents(store.connection, + queueId: queueId, lastEventId: lastEventId); + if (_disposed) return; + } catch (e) { + if (_disposed) return; + store.isLoading = true; + + if (e is! ApiRequestException) { + // Some unexpected error, outside even making the HTTP request. + // Definitely a bug in our code. + rethrow; + } - store.isLoading = true; - final localizations = GlobalLocalizations.zulipLocalizations; - final serverUrl = store.connection.realmUrl.origin; - switch (e) { - case ZulipApiException(code: 'BAD_EVENT_QUEUE_ID'): - assert(debugLog('Lost event queue for $store. Replacing…')); - // This disposes the store, which disposes this update machine. - await store._globalStore._reloadPerAccount(store.accountId); - debugLog('… Event queue replaced.'); - return; - - case Server5xxException(): - assert(debugLog('Transient error polling event queue for $store: $e\n' - 'Backing off, then will retry…')); - maybeReportTransientError( - localizations.errorConnectingToServerShort, - details: localizations.errorConnectingToServerDetails( - serverUrl, e.toString())); - await (backoffMachine ??= BackoffMachine()).wait(); - assert(debugLog('… Backoff wait complete, retrying poll.')); - continue; - - case NetworkException(): - assert(debugLog('Transient error polling event queue for $store: $e\n' - 'Backing off, then will retry…')); - if (e.cause is! SocketException) { - // Heuristic check to only report interesting errors to the user. + bool shouldReportToUser; + switch (e) { + case NetworkException(cause: SocketException()): // A [SocketException] is common when the app returns from sleep. - maybeReportTransientError( - localizations.errorConnectingToServerShort, - details: localizations.errorConnectingToServerDetails( - serverUrl, e.toString())); - } - await (backoffMachine ??= BackoffMachine()).wait(); - assert(debugLog('… Backoff wait complete, retrying poll.')); - continue; - - default: - assert(debugLog('Error polling event queue for $store: $e\n' - 'Backing off and retrying even though may be hopeless…')); - // TODO(#186): Handle unrecoverable failures - reportErrorToUserBriefly( - localizations.errorConnectingToServerShort, - details: localizations.errorConnectingToServerDetails( - serverUrl, e.toString())); - await (backoffMachine ??= BackoffMachine()).wait(); - assert(debugLog('… Backoff wait complete, retrying poll.')); - continue; + shouldReportToUser = false; + + case NetworkException(): + case Server5xxException(): + shouldReportToUser = true; + + case ServerException(httpStatus: 429): + case ZulipApiException(httpStatus: 429): + case ZulipApiException(code: 'RATE_LIMIT_HIT'): + // TODO(#946) handle rate-limit errors more generally, in ApiConnection + shouldReportToUser = true; + + case ZulipApiException(code: 'BAD_EVENT_QUEUE_ID'): + rethrow; + + case ZulipApiException(): + case MalformedServerResponseException(): + // Either a 4xx we didn't expect, or a malformed response; + // in either case, a mismatch of the client's expectations to the + // server's behavior, and therefore a bug in one or the other. + // TODO(#1054) handle auth failures specifically + rethrow; + } + + assert(debugLog('Transient error polling event queue for $store: $e\n' + 'Backing off, then will retry…')); + if (shouldReportToUser) { + maybeReportToUserTransientError(e); + } + await (backoffMachine ??= BackoffMachine()).wait(); + if (_disposed) return; + assert(debugLog('… Backoff wait complete, retrying poll.')); + continue; + } + + // After one successful request, we reset backoff to its initial state. + // That way if the user is off the network and comes back on, the app + // doesn't wind up in a state where it's slow to recover the next time + // one request fails. + // + // This does mean that if the server is having trouble and handling some + // but not all of its requests, we'll end up doing a lot more retries than + // if we stayed at the max backoff interval; partway toward what would + // happen if we weren't backing off at all. + // + // But at least for [getEvents] requests, as here, it should be OK, + // because this is a long-poll. That means a typical successful request + // takes a long time to come back; in fact longer than our max backoff + // duration (which is 10 seconds). So if we're getting a mix of successes + // and failures, the successes themselves should space out the requests. + backoffMachine = null; + + store.isLoading = false; + // Dismiss existing errors, if any. + reportErrorToUserBriefly(null); + accumulatedTransientFailureCount = 0; + + final events = result.events; + for (final event in events) { + try { + await store.handleEvent(event); + if (_disposed) return; + } catch (e, stackTrace) { + if (_disposed) return; + Error.throwWithStackTrace( + _EventHandlingException(cause: e, event: event), stackTrace); + } + } + if (events.isNotEmpty) { + lastEventId = events.last.id; } } + } catch (e) { + if (_disposed) return; + + // An error occurred, other than the transient request errors we retry on. + // This means either a lost/expired event queue on the server (which is + // normal after the app is offline for a period like 10 minutes), + // or an unexpected exception representing a bug in our code or the server. + // Either way, the show must go on. So reload server data from scratch. + + // First, log what happened. + store.isLoading = true; + bool isUnexpected; + switch (e) { + case ZulipApiException(code: 'BAD_EVENT_QUEUE_ID'): + assert(debugLog('Lost event queue for $store. Replacing…')); + // The old event queue is gone, so we need a new one. This is normal. + isUnexpected = false; + + case _EventHandlingException(:final cause, :final event): + assert(debugLog('BUG: Error handling an event: $cause\n' // TODO(log) + ' event: $event\n' + 'Replacing event queue…')); + reportErrorToUserBriefly( + GlobalLocalizations.zulipLocalizations.errorHandlingEventTitle, + details: GlobalLocalizations.zulipLocalizations.errorHandlingEventDetails( + store.realmUrl.toString(), cause.toString(), event.toString())); + // We can't just continue with the next event, because our state + // may be garbled due to failing to apply this one (and worse, + // any invariants that were left in a broken state from where + // the exception was thrown). So reload from scratch. + // Hopefully (probably?) the bug only affects our implementation of + // the *change* in state represented by the event, and when we get the + // new state in a fresh InitialSnapshot we'll handle that just fine. + isUnexpected = true; + + default: + assert(debugLog('BUG: Unexpected error in event polling: $e\n' // TODO(log) + 'Replacing event queue…')); + _reportToUserErrorConnectingToServer(e); + // Similar story to the _EventHandlingException case; + // separate only so that that other case can print more context. + // The bug here could be in the server if it's an ApiRequestException, + // but our remedy is the same either way. + isUnexpected = true; + } - // After one successful request, we reset backoff to its initial state. - // That way if the user is off the network and comes back on, the app - // doesn't wind up in a state where it's slow to recover the next time - // one request fails. - // - // This does mean that if the server is having trouble and handling some - // but not all of its requests, we'll end up doing a lot more retries than - // if we stayed at the max backoff interval; partway toward what would - // happen if we weren't backing off at all. - // - // But at least for [getEvents] requests, as here, it should be OK, - // because this is a long-poll. That means a typical successful request - // takes a long time to come back; in fact longer than our max backoff - // duration (which is 10 seconds). So if we're getting a mix of successes - // and failures, the successes themselves should space out the requests. - backoffMachine = null; - store.isLoading = false; - // Dismiss existing errors, if any. - reportErrorToUserBriefly(null); - accumulatedTransientFailureCount = 0; - - final events = result.events; - for (final event in events) { - await store.handleEvent(event); + if (isUnexpected) { + // We don't know the cause of the failure; it might well keep happening. + // Avoid creating a retry storm. + await _unexpectedErrorBackoffMachine.wait(); if (_disposed) return; } - if (events.isNotEmpty) { - lastEventId = events.last.id; - } + + // This disposes the store, which disposes this update machine. + await store._globalStore._reloadPerAccount(store.accountId); + assert(debugLog('… Event queue replaced.')); + return; } } + void _reportToUserErrorConnectingToServer(Object error) { + final localizations = GlobalLocalizations.zulipLocalizations; + reportErrorToUserBriefly( + localizations.errorConnectingToServerShort, + details: localizations.errorConnectingToServerDetails( + store.realmUrl.toString(), error.toString())); + } + /// Send this client's notification token to the server, now and if it changes. /// /// TODO The returned future isn't especially meaningful (it may or may not @@ -1176,3 +1275,10 @@ class UpdateMachine { @override String toString() => '${objectRuntimeType(this, 'UpdateMachine')}#${shortHash(this)}'; } + +class _EventHandlingException implements Exception { + final Object cause; + final Event event; + + _EventHandlingException({required this.cause, required this.event}); +} diff --git a/test/example_data.dart b/test/example_data.dart index c7739b1928..159b14aa61 100644 --- a/test/example_data.dart +++ b/test/example_data.dart @@ -18,6 +18,10 @@ void _checkPositive(int? value, String description) { assert(value == null || value > 0, '$description should be positive'); } +Object nullCheckError() { + try { null!; } catch (e) { return e; } // ignore: null_check_always_fails +} + //////////////////////////////////////////////////////////////// // Realm-wide (or server-wide) metadata. // diff --git a/test/model/store_test.dart b/test/model/store_test.dart index ac9e64aa2a..10e8698360 100644 --- a/test/model/store_test.dart +++ b/test/model/store_test.dart @@ -580,12 +580,14 @@ void main() { connection = store.connection as FakeApiConnection; } - Future prepareStore({int? lastEventId}) async { + Future preparePoll({int? lastEventId}) async { globalStore = TestGlobalStore(accounts: []); await globalStore.add(eg.selfAccount, eg.initialSnapshot( lastEventId: lastEventId)); await globalStore.perAccount(eg.selfAccount.id); updateFromGlobalStore(); + updateMachine.debugPauseLoop(); + updateMachine.poll(); } void checkLastRequest({required int lastEventId}) { @@ -599,12 +601,9 @@ void main() { } test('loops on success', () => awaitFakeAsync((async) async { - await prepareStore(lastEventId: 1); + await preparePoll(lastEventId: 1); check(updateMachine.lastEventId).equals(1); - updateMachine.debugPauseLoop(); - updateMachine.poll(); - // Loop makes first request, and processes result. connection.prepare(json: GetEventsResult(events: [ HeartbeatEvent(id: 2), @@ -612,7 +611,7 @@ void main() { updateMachine.debugAdvanceLoop(); async.flushMicrotasks(); checkLastRequest(lastEventId: 1); - await Future.delayed(Duration.zero); + async.elapse(Duration.zero); check(updateMachine.lastEventId).equals(2); // Loop makes second request, and processes result. @@ -622,14 +621,12 @@ void main() { updateMachine.debugAdvanceLoop(); async.flushMicrotasks(); checkLastRequest(lastEventId: 2); - await Future.delayed(Duration.zero); + async.elapse(Duration.zero); check(updateMachine.lastEventId).equals(3); })); test('handles events', () => awaitFakeAsync((async) async { - await prepareStore(); - updateMachine.debugPauseLoop(); - updateMachine.poll(); + await preparePoll(); // Pick some arbitrary event and check it gets processed on the store. check(store.userSettings!.twentyFourHourTime).isFalse(); @@ -638,78 +635,51 @@ void main() { property: UserSettingName.twentyFourHourTime, value: true), ], queueId: null).toJson()); updateMachine.debugAdvanceLoop(); - async.flushMicrotasks(); - await Future.delayed(Duration.zero); + async.elapse(Duration.zero); check(store.userSettings!.twentyFourHourTime).isTrue(); })); - test('handles expired queue', () => awaitFakeAsync((async) async { - await prepareStore(); - updateMachine.debugPauseLoop(); - updateMachine.poll(); - check(globalStore.perAccountSync(store.accountId)).identicalTo(store); - - // Let the server expire the event queue. - connection.prepare(httpStatus: 400, json: { - 'result': 'error', 'code': 'BAD_EVENT_QUEUE_ID', - 'queue_id': updateMachine.queueId, - 'msg': 'Bad event queue ID: ${updateMachine.queueId}', - }); - updateMachine.debugAdvanceLoop(); - async.flushMicrotasks(); - await Future.delayed(Duration.zero); - check(store).isLoading.isTrue(); - - // The global store has a new store. - check(globalStore.perAccountSync(store.accountId)).not((it) => it.identicalTo(store)); - updateFromGlobalStore(); - check(store).isLoading.isFalse(); + void checkReload(FutureOr Function() prepareError, { + bool expectBackoff = true, + }) { + awaitFakeAsync((async) async { + await preparePoll(); + check(globalStore.perAccountSync(store.accountId)).identicalTo(store); - // The new UpdateMachine updates the new store. - updateMachine.debugPauseLoop(); - updateMachine.poll(); - check(store.userSettings!.twentyFourHourTime).isFalse(); - connection.prepare(json: GetEventsResult(events: [ - UserSettingsUpdateEvent(id: 2, - property: UserSettingName.twentyFourHourTime, value: true), - ], queueId: null).toJson()); - updateMachine.debugAdvanceLoop(); - async.flushMicrotasks(); - await Future.delayed(Duration.zero); - check(store.userSettings!.twentyFourHourTime).isTrue(); - })); + await prepareError(); + updateMachine.debugAdvanceLoop(); + async.elapse(Duration.zero); + check(store).isLoading.isTrue(); - test('expired queue disposes registered MessageListView instances', () => awaitFakeAsync((async) async { - // Regression test for: https://github.com/zulip/zulip-flutter/issues/810 - await prepareStore(); - updateMachine.debugPauseLoop(); - updateMachine.poll(); + if (expectBackoff) { + // The reload doesn't happen immediately; there's a timer. + check(globalStore.perAccountSync(store.accountId)).identicalTo(store); + check(async.pendingTimers).length.equals(1); + async.flushTimers(); + } - // Make sure there are [MessageListView]s in the message store. - MessageListView.init(store: store, narrow: const MentionsNarrow()); - MessageListView.init(store: store, narrow: const StarredMessagesNarrow()); - check(store.debugMessageListViews).length.equals(2); + // The global store has a new store. + check(globalStore.perAccountSync(store.accountId)).not((it) => it.identicalTo(store)); + updateFromGlobalStore(); + check(store).isLoading.isFalse(); - // Let the server expire the event queue. - connection.prepare(httpStatus: 400, json: { - 'result': 'error', 'code': 'BAD_EVENT_QUEUE_ID', - 'queue_id': updateMachine.queueId, - 'msg': 'Bad event queue ID: ${updateMachine.queueId}', + // The new UpdateMachine updates the new store. + updateMachine.debugPauseLoop(); + updateMachine.poll(); + check(store.userSettings!.twentyFourHourTime).isFalse(); + connection.prepare(json: GetEventsResult(events: [ + UserSettingsUpdateEvent(id: 2, + property: UserSettingName.twentyFourHourTime, value: true), + ], queueId: null).toJson()); + updateMachine.debugAdvanceLoop(); + async.elapse(Duration.zero); + check(store.userSettings!.twentyFourHourTime).isTrue(); }); - updateMachine.debugAdvanceLoop(); - async.flushMicrotasks(); - await Future.delayed(Duration.zero); - - // The old store's [MessageListView]s have been disposed. - // (And no exception was thrown; that was #810.) - check(store.debugMessageListViews).isEmpty(); - })); + } void checkRetry(void Function() prepareError) { awaitFakeAsync((async) async { - await prepareStore(lastEventId: 1); - updateMachine.debugPauseLoop(); - updateMachine.poll(); + await preparePoll(lastEventId: 1); check(async.pendingTimers).length.equals(0); // Make the request, inducing an error in it. @@ -737,23 +707,142 @@ void main() { }); } + // These cases are ordered by how far the request got before it failed. + + void prepareUnexpectedLoopError() { + updateMachine.debugPrepareLoopError(eg.nullCheckError()); + } + + void prepareNetworkExceptionSocketException() { + connection.prepare(exception: const SocketException('failed')); + } + + void prepareNetworkException() { + connection.prepare(exception: Exception("failed")); + } + + void prepareServer5xxException() { + connection.prepare(httpStatus: 500, body: 'splat'); + } + + void prepareMalformedServerResponseException() { + connection.prepare(httpStatus: 200, body: 'nonsense'); + } + + void prepareRateLimitExceptionCode() { + // Example from the Zulip API docs: + // https://zulip.com/api/rest-error-handling#rate-limit-exceeded + // (The actual HTTP status should be 429, but that seems undocumented.) + connection.prepare(httpStatus: 400, json: { + 'result': 'error', 'code': 'RATE_LIMIT_HIT', + 'msg': 'API usage exceeded rate limit', + 'retry-after': 28.706807374954224}); + } + + void prepareRateLimitExceptionStatus() { + // The HTTP status code for hitting a rate limit, + // but for some reason a boring BAD_REQUEST error body. + connection.prepare(httpStatus: 429, json: { + 'result': 'error', 'code': 'BAD_REQUEST', 'msg': 'Bad request'}); + } + + void prepareRateLimitExceptionMalformed() { + // The HTTP status code for hitting a rate limit, + // but for some reason a non-JSON body. + connection.prepare(httpStatus: 429, + body: 'An error occurred.'); + } + + void prepareZulipApiExceptionBadRequest() { + connection.prepare(httpStatus: 400, json: { + 'result': 'error', 'code': 'BAD_REQUEST', 'msg': 'Bad request'}); + } + + void prepareExpiredEventQueue() { + connection.prepare(httpStatus: 400, json: { + 'result': 'error', 'code': 'BAD_EVENT_QUEUE_ID', + 'queue_id': updateMachine.queueId, + 'msg': 'Bad event queue ID: ${updateMachine.queueId}', + }); + } + + Future prepareHandleEventError() async { + final stream = eg.stream(); + await store.addStream(stream); + // Set up a situation that breaks our data structures' invariants: + // a stream/channel found in the by-ID map is missing in the by-name map. + store.streamsByName.remove(stream.name); + // Then prepare an event on which handleEvent will throw + // because it hits that broken invariant. + connection.prepare(json: GetEventsResult(events: [ + ChannelDeleteEvent(id: 1, streams: [stream]), + ], queueId: null).toJson()); + } + + test('reloads on unexpected error within loop', () { + checkReload(prepareUnexpectedLoopError); + }); + + test('retries on NetworkException from SocketException', () { + // We skip reporting errors on these; check we retry them all the same. + checkRetry(prepareNetworkExceptionSocketException); + }); + + test('retries on generic NetworkException', () { + checkRetry(prepareNetworkException); + }); + test('retries on Server5xxException', () { - checkRetry(() => connection.prepare(httpStatus: 500, body: 'splat')); + checkRetry(prepareServer5xxException); + }); + + test('reloads on MalformedServerResponseException', () { + checkReload(prepareMalformedServerResponseException); + }); + + test('retries on rate limit: code RATE_LIMIT_HIT', () { + checkRetry(prepareRateLimitExceptionCode); }); - test('retries on NetworkException', () { - checkRetry(() => connection.prepare(exception: Exception("failed"))); + test('retries on rate limit: status 429 ZulipApiException', () { + checkRetry(prepareRateLimitExceptionStatus); }); - test('retries on ZulipApiException', () { - checkRetry(() => connection.prepare(httpStatus: 400, json: { - 'result': 'error', 'code': 'BAD_REQUEST', 'msg': 'Bad request'})); + test('retries on rate limit: status 429 MalformedServerResponseException', () { + checkRetry(prepareRateLimitExceptionMalformed); }); - test('retries on MalformedServerResponseException', () { - checkRetry(() => connection.prepare(httpStatus: 200, body: 'nonsense')); + test('reloads on generic ZulipApiException', () { + checkReload(prepareZulipApiExceptionBadRequest); }); + test('reloads immediately on expired queue', () { + checkReload(expectBackoff: false, prepareExpiredEventQueue); + }); + + test('reloads on handleEvent error', () { + checkReload(prepareHandleEventError); + }); + + test('expired queue disposes registered MessageListView instances', () => awaitFakeAsync((async) async { + // Regression test for: https://github.com/zulip/zulip-flutter/issues/810 + await preparePoll(); + + // Make sure there are [MessageListView]s in the message store. + MessageListView.init(store: store, narrow: const MentionsNarrow()); + MessageListView.init(store: store, narrow: const StarredMessagesNarrow()); + check(store.debugMessageListViews).length.equals(2); + + // Let the server expire the event queue. + prepareExpiredEventQueue(); + updateMachine.debugAdvanceLoop(); + async.elapse(Duration.zero); + + // The old store's [MessageListView]s have been disposed. + // (And no exception was thrown; that was #810.) + check(store.debugMessageListViews).isEmpty(); + })); + group('report error', () { String? lastReportedError; String? takeLastReportedError() { @@ -762,78 +851,145 @@ void main() { return result; } - /// This is an alternative to [ZulipApp]'s implementation of - /// [reportErrorToUserBriefly] for testing. - Future logAndReportErrorToUserBriefly(String? message, { - String? details, - }) async { + Future logReportedError(String? message, {String? details}) async { if (message == null) return; lastReportedError = '$message\n$details'; } Future prepare() async { - reportErrorToUserBriefly = logAndReportErrorToUserBriefly; + reportErrorToUserBriefly = logReportedError; addTearDown(() => reportErrorToUserBriefly = defaultReportErrorToUserBriefly); - - await prepareStore(lastEventId: 1); - updateMachine.debugPauseLoop(); - updateMachine.poll(); + await preparePoll(lastEventId: 1); } - void pollAndFail(FakeAsync async) { + void pollAndFail(FakeAsync async, {bool shouldCheckRequest = true}) { updateMachine.debugAdvanceLoop(); async.elapse(Duration.zero); - checkLastRequest(lastEventId: 1); + if (shouldCheckRequest) checkLastRequest(lastEventId: 1); check(store).isLoading.isTrue(); } - test('report non-transient errors', () => awaitFakeAsync((async) async { - await prepare(); + Subject checkReported(void Function() prepareError) { + return awaitFakeAsync((async) async { + await prepare(); + prepareError(); + // No need to check on the request; there's no later step of this test + // for it to be needed as setup for. + pollAndFail(async, shouldCheckRequest: false); + return check(takeLastReportedError()).isNotNull(); + }); + } + + Subject checkLateReported(void Function() prepareError) { + return awaitFakeAsync((async) async { + await prepare(); + + for (int i = 0; i < UpdateMachine.transientFailureCountNotifyThreshold; i++) { + prepareError(); + pollAndFail(async); + check(takeLastReportedError()).isNull(); + async.flushTimers(); + if (!identical(store, globalStore.perAccountSync(store.accountId))) { + // Store was reloaded. + updateFromGlobalStore(); + updateMachine.debugPauseLoop(); + updateMachine.poll(); + } + } + + prepareError(); + pollAndFail(async); + return check(takeLastReportedError()).isNotNull(); + }); + } + + void checkNotReported(void Function() prepareError) { + return awaitFakeAsync((async) async { + await prepare(); + + for (int i = 0; i < UpdateMachine.transientFailureCountNotifyThreshold; i++) { + prepareError(); + pollAndFail(async); + check(takeLastReportedError()).isNull(); + async.flushTimers(); + if (!identical(store, globalStore.perAccountSync(store.accountId))) { + // Store was reloaded. + updateFromGlobalStore(); + updateMachine.debugPauseLoop(); + updateMachine.poll(); + } + } + + prepareError(); + pollAndFail(async); + // Still no error reported, even after the same number of iterations + // where other errors get reported (as [checkLateReported] checks). + check(takeLastReportedError()).isNull(); + }); + } + + test('report unexpected error within loop', () { + checkReported(prepareUnexpectedLoopError); + }); + + test('ignore NetworkException from SocketException', () { + checkNotReported(prepareNetworkExceptionSocketException); + }); - connection.prepare(httpStatus: 400, json: { - 'result': 'error', 'code': 'BAD_REQUEST', 'msg': 'Bad request'}); - pollAndFail(async); - check(takeLastReportedError()).isNotNull().startsWith( + test('eventually report generic NetworkException', () { + checkLateReported(prepareNetworkException).startsWith( "Error connecting to Zulip. Retrying…\n" "Error connecting to Zulip at"); - })); + }); - test('report transient errors', () => awaitFakeAsync((async) async { - await prepare(); + test('eventually report Server5xxException', () { + checkLateReported(prepareServer5xxException).startsWith( + "Error connecting to Zulip. Retrying…\n" + "Error connecting to Zulip at"); + }); - // There should be no user visible error messages during these retries. - for (int i = 0; i < UpdateMachine.transientFailureCountNotifyThreshold; i++) { - connection.prepare(httpStatus: 500, body: 'splat'); - pollAndFail(async); - check(takeLastReportedError()).isNull(); - // This skips the pending polling backoff. - async.flushTimers(); - } + test('report MalformedServerResponseException', () { + checkReported(prepareMalformedServerResponseException).startsWith( + "Error connecting to Zulip. Retrying…\n" + "Error connecting to Zulip at"); + }); - connection.prepare(httpStatus: 500, body: 'splat'); - pollAndFail(async); - check(takeLastReportedError()).isNotNull().startsWith( + test('report rate limit: code RATE_LIMIT_HIT', () { + checkLateReported(prepareRateLimitExceptionCode).startsWith( "Error connecting to Zulip. Retrying…\n" "Error connecting to Zulip at"); - })); + }); - test('ignore boring errors', () => awaitFakeAsync((async) async { - await prepare(); + test('report rate limit: status 429 ZulipApiException', () { + checkLateReported(prepareRateLimitExceptionStatus).startsWith( + "Error connecting to Zulip. Retrying…\n" + "Error connecting to Zulip at"); + }); - for (int i = 0; i < UpdateMachine.transientFailureCountNotifyThreshold; i++) { - connection.prepare(exception: const SocketException('failed')); - pollAndFail(async); - check(takeLastReportedError()).isNull(); - // This skips the pending polling backoff. - async.flushTimers(); - } + test('report rate limit: status 429 MalformedServerResponseException', () { + checkLateReported(prepareRateLimitExceptionMalformed).startsWith( + "Error connecting to Zulip. Retrying…\n" + "Error connecting to Zulip at"); + }); - connection.prepare(exception: const SocketException('failed')); - pollAndFail(async); - // Normally we start showing user visible error messages for transient - // errors after enough number of retries. - check(takeLastReportedError()).isNull(); - })); + test('report generic ZulipApiException', () { + checkReported(prepareZulipApiExceptionBadRequest).startsWith( + "Error connecting to Zulip. Retrying…\n" + "Error connecting to Zulip at"); + }); + + test('ignore expired queue', () { + checkNotReported(prepareExpiredEventQueue); + }); + + test('nicely report handleEvent error', () { + checkReported(prepareHandleEventError).matchesPattern(RegExp( + r"Error handling a Zulip event\. Retrying connection…\n" + r"Error handling a Zulip event from \S+; will retry\.\n" + r"\n" + r"Error: .*channel\.dart.. Failed assertion.*" + )); + }); }); });